Spaces:

ipd
/

test

Sleeping

File size: 8,327 Bytes

79d2379

import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import xgboost as xgb
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

os.environ["OMP_MAX_ACTIVE_LEVELS"] = "1"

import models.fm4m as fm4m


# Function to create model based on user input
def _create_model(
    model_name, max_depth=None, n_estimators=None, alpha=None, degree=None, kernel=None
):
    if model_name == "XGBClassifier":
        model = xgb.XGBClassifier(
            objective='binary:logistic',
            eval_metric='auc',
            max_depth=max_depth,
            n_estimators=n_estimators,
            alpha=alpha,
        )
    elif model_name == "SVR":
        model = SVR(degree=degree, kernel=kernel)
    elif model_name == "Kernel Ridge":
        model = KernelRidge(alpha=alpha, degree=degree, kernel=kernel)
    elif model_name == "Linear Regression":
        model = LinearRegression()
    elif model_name == "Default - Auto":
        return "Default Settings"
    else:
        return "Model not supported."

    return f"{model_name} * {model.get_params()}"


# Function to handle model creation based on input parameters
def create_downstream_model(model_name, max_depth, n_estimators, alpha, degree, kernel):
    if model_name == "XGBClassifier":
        return _create_model(
            model_name,
            max_depth=max_depth,
            n_estimators=n_estimators,
            alpha=alpha,
        )
    elif model_name == "SVR":
        return _create_model(model_name, degree=degree, kernel=kernel)
    elif model_name == "Kernel Ridge":
        return _create_model(model_name, alpha=alpha, degree=degree, kernel=kernel)
    elif model_name == "Linear Regression":
        return _create_model(model_name)
    elif model_name == "Default - Auto":
        return _create_model(model_name)


# Function to display evaluation score
def display_eval(selected_models, dataset, task_type, downstream, fusion_type, state):
    result = None

    try:
        downstream_model = downstream.split("*")[0].lstrip()
        downstream_model = downstream_model.rstrip()
        hyp_param = downstream.split("*")[-1].lstrip()
        hyp_param = hyp_param.rstrip()
        hyp_param = hyp_param.replace("nan", "float('nan')")
        params = eval(hyp_param)
    except:
        downstream_model = downstream.split("*")[0].lstrip()
        downstream_model = downstream_model.rstrip()
        params = None

    try:
        if not selected_models:
            return "Please select at least one enabled model."

        if len(selected_models) > 1:
            if task_type == "Classification":
                if downstream_model == "Default Settings":
                    downstream_model = "DefaultClassifier"
                    params = None
                (
                    result,
                    state["roc_auc"],
                    state["fpr"],
                    state["tpr"],
                    state["x_batch"],
                    state["y_batch"],
                ) = fm4m.multi_modal(
                    model_list=selected_models,
                    downstream_model=downstream_model,
                    params=params,
                    dataset=dataset,
                )

            elif task_type == "Regression":
                if downstream_model == "Default Settings":
                    downstream_model = "DefaultRegressor"
                    params = None
                (
                    result,
                    state["RMSE"],
                    state["y_batch_test"],
                    state["y_prob"],
                    state["x_batch"],
                    state["y_batch"],
                ) = fm4m.multi_modal(
                    model_list=selected_models,
                    downstream_model=downstream_model,
                    params=params,
                    dataset=dataset,
                )

        else:
            if task_type == "Classification":
                if downstream_model == "Default Settings":
                    downstream_model = "DefaultClassifier"
                    params = None
                (
                    result,
                    state["roc_auc"],
                    state["fpr"],
                    state["tpr"],
                    state["x_batch"],
                    state["y_batch"],
                ) = fm4m.single_modal(
                    model=selected_models[0],
                    downstream_model=downstream_model,
                    params=params,
                    dataset=dataset,
                )

            elif task_type == "Regression":
                if downstream_model == "Default Settings":
                    downstream_model = "DefaultRegressor"
                    params = None
                (
                    result,
                    state["RMSE"],
                    state["y_batch_test"],
                    state["y_prob"],
                    state["x_batch"],
                    state["y_batch"],
                ) = fm4m.single_modal(
                    model=selected_models[0],
                    downstream_model=downstream_model,
                    params=params,
                    dataset=dataset,
                )

    except Exception as e:
        return f"An error occurred: {e}"
    return result or "Data & Model Setting is incorrect"


# Function to handle plot display
def display_plot(plot_type, state):
    fig, ax = plt.subplots()

    if plot_type == "Latent Space":
        x_batch, y_batch = state.get("x_batch"), state.get("y_batch")
        ax.set_title("T-SNE Plot")
        class_0 = x_batch
        class_1 = y_batch

        plt.scatter(class_1[:, 0], class_1[:, 1], c='red', label='Class 1')
        plt.scatter(class_0[:, 0], class_0[:, 1], c='blue', label='Class 0')

        ax.set_xlabel('Feature 1')
        ax.set_ylabel('Feature 2')
        ax.set_title('Dataset Distribution')

    elif plot_type == "ROC-AUC":
        roc_auc, fpr, tpr = state.get("roc_auc"), state.get("fpr"), state.get("tpr")
        ax.set_title("ROC-AUC Curve")
        try:
            ax.plot(
                fpr,
                tpr,
                color='darkorange',
                lw=2,
                label=f'ROC curve (area = {roc_auc:.4f})',
            )
            ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
            ax.set_xlim([0.0, 1.0])
            ax.set_ylim([0.0, 1.05])
        except:
            pass
        ax.set_xlabel('False Positive Rate')
        ax.set_ylabel('True Positive Rate')
        ax.set_title('Receiver Operating Characteristic')
        ax.legend(loc='lower right')

    elif plot_type == "Parity Plot":
        RMSE, y_batch_test, y_prob = (
            state.get("RMSE"),
            state.get("y_batch_test"),
            state.get("y_prob"),
        )
        ax.set_title("Parity plot")

        # change format
        try:
            print(y_batch_test)
            print(y_prob)
            y_batch_test = np.array(y_batch_test, dtype=float)
            y_prob = np.array(y_prob, dtype=float)
            ax.scatter(
                y_batch_test,
                y_prob,
                color="blue",
                label=f"Predicted vs Actual (RMSE: {RMSE:.4f})",
            )
            min_val = min(min(y_batch_test), min(y_prob))
            max_val = max(max(y_batch_test), max(y_prob))
            ax.plot([min_val, max_val], [min_val, max_val], 'r-')

        except:
            y_batch_test = []
            y_prob = []
            RMSE = None
            print(y_batch_test)
            print(y_prob)

        ax.set_xlabel('Actual Values')
        ax.set_ylabel('Predicted Values')

        ax.legend(loc='lower right')
    return fig


# Function to handle evaluation and logging
def evaluate_and_log(models, dataset, task_type, eval_output, state):
    task_dic = {'Classification': 'CLS', 'Regression': 'RGR'}
    result = eval_output.replace(" Score", "")

    new_entry = {
        "Selected Models": str(models),
        "Dataset": dataset,
        "Task": task_dic[task_type],
        "Result": result,
    }
    new_entry_df = pd.DataFrame([new_entry])

    state["log_df"] = pd.concat([new_entry_df, state["log_df"]])
    return state["log_df"]