File size: 4,511 Bytes
a10547f
b6c250a
 
 
12c727d
f0656c0
a10547f
12c727d
b6c250a
a10547f
12c727d
 
b6c250a
8be3cb5
 
 
 
12c727d
b6c250a
12c727d
9e4102b
 
 
 
 
12c727d
b0e84c2
92a5021
 
 
 
 
 
 
 
de23191
92a5021
 
4924a5b
92a5021
 
 
de23191
 
92a5021
de23191
92a5021
 
b6c250a
b0e84c2
 
 
 
 
f0656c0
 
 
 
 
 
 
 
 
 
 
 
 
dabf209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6c250a
33659a3
b6c250a
f0656c0
33659a3
 
e01a5da
33659a3
 
 
f0656c0
dabf209
b0e84c2
33659a3
 
f0656c0
b0e84c2
33659a3
f0656c0
33659a3
 
f0656c0
33659a3
 
f0656c0
dabf209
33659a3
dabf209
33659a3
4d4ad4b
33659a3
4d4ad4b
 
b6c250a
b0e84c2
b4315ee
e01a5da
12c727d
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
import joblib
import pandas as pd
import datasets
import json
import numpy as np

# Load the model
pipe = joblib.load("./model.pkl")

title = "Premium Amount Prediction"
description = "This model predicts the Premium Amount. Drag and drop any slice from the dataset or edit values as you wish in the dataframe component below."

# Load and prepare dataset
df = datasets.load_dataset("silvaKenpachi/mental_health")["train"].to_pandas()
df.dropna(axis=0, inplace=True)

# Load configuration
with open("./config.json") as f:
    config_dict = json.load(f)
all_headers = config_dict["sklearn"]["columns"]

# Filter headers to only include those present in the dataset
headers = [col for col in all_headers if col in df.columns]

# Define input and output interfaces
#inputs = [gr.Dataframe(headers=headers, row_count=(2, "dynamic"), col_count=(len(headers), "fixed"), label="Input Data", interactive=True)]

#working code that returns only 2 rows in output
#inputs = [gr.Dataframe(headers=all_headers, row_count=(2, "dynamic"), col_count=(len(all_headers), "fixed"), label="Input Data", interactive=True)]
#outputs = [gr.Dataframe(row_count=(2, "dynamic"), col_count=(1, "fixed"), label="Predictions", headers=["Depression"])]

# Define input and output interfaces with dynamic row counts
inputs = [gr.Dataframe(
    headers=headers,
    row_count=(10, "dynamic"),  # Use tuple format (min_rows, "dynamic")
    col_count=(len(headers), "fixed"),
    label="Input Data",
    interactive=True
)]

outputs = [gr.Dataframe(
    row_count=(10, "dynamic"),  # Use tuple format (min_rows, "dynamic")
    col_count=(2, "fixed"),
    label="Predictions",
    headers=["Name", "Depression"]
)]


#def infer(inputs):
    #data = pd.DataFrame(inputs, columns=headers)
    #predictions = pipe.predict(data)
    #return pd.DataFrame(predictions, columns=["Depression"])

#code to fix missing columns with na
#def infer(inputs):
    #data = pd.DataFrame(inputs, columns=headers)
    # Add missing columns with default values (e.g., 0)
    #for col in all_headers:
        #if col not in data.columns:
            #data[col] = 0
    # Ensure the order of columns matches the training data
    #data = data[all_headers]
    #predictions = pipe.predict(data)
    #return pd.DataFrame(predictions, columns=["Depression"])


#def infer(inputs):
    #data = pd.DataFrame(inputs, columns=headers)
    
    # Replace empty strings with NaN
    #data = data.replace('', np.nan)
    
    # Add missing columns with default values (e.g., 0)
    #for col in all_headers:
        #if col not in data.columns:
            #data[col] = 0
    
    # Ensure the order of columns matches the training data
    #data = data[all_headers]
    
    # Fill NaN values with default values (e.g., 0)
    #data = data.fillna(0)
    
    # Convert all data to float
    #data = data.astype(float)
    
    #predictions = pipe.predict(data)
    #return pd.DataFrame(predictions, columns=["Name", "Depression"])
    #return pd.DataFrame({
        #'Name': data['Name'],
        #'Depression': predictions
    #})

def infer(inputs):
    # Create DataFrame from inputs
    data = pd.DataFrame(inputs, columns=headers)
    
    # Create a copy of the input DataFrame to preserve original data
    prediction_data = data.copy()
    
    # Replace empty strings with NaN for numeric columns only
    numeric_columns = [col for col in all_headers if col != 'Name']
    prediction_data[numeric_columns] = prediction_data[numeric_columns].replace('', np.nan)
    
    # Add missing columns with default values
    for col in all_headers:
        if col not in prediction_data.columns:
            prediction_data[col] = 0
    
    # Ensure the order of columns matches the training data
    prediction_data = prediction_data[all_headers]
    
    # Fill NaN values in numeric columns only
    prediction_data[numeric_columns] = prediction_data[numeric_columns].fillna(0)
    
    # Convert numeric columns to float
    prediction_data[numeric_columns] = prediction_data[numeric_columns].astype(float)
    
    # Make predictions
    predictions = pipe.predict(prediction_data)
    
    # Create output DataFrame using original names
    return pd.DataFrame({
        'Name': data['Name'],
        'Depression': predictions
    })




gr.Interface(
    fn=infer,
    inputs=inputs,
    outputs=outputs,
    title=title,
    description=description,
    examples=[df[headers].head(3).values.tolist()],
    cache_examples=False
).launch(debug=True)