Spaces:
Sleeping
Sleeping
# Utility functions for filtering the dataframe | |
import pandas as pd | |
def filter_cols(df): | |
df = df[[ | |
'Model Name', | |
'Clemscore', | |
'Input $/1M tokens', | |
'Output $/1M tokens', | |
'Latency (s)', | |
'Context Size (k)', | |
'Parameters (B)', | |
'Release Date', | |
'License' | |
]] | |
return df | |
def filter(df, language_list, parameters, input_price, output_price, multimodal, | |
context, open_weight, start, end, license ): | |
if not df.empty: # Check if df is non-empty | |
df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))] | |
if not df.empty: | |
# Split dataframe by Open Weight | |
open_weight_true = df[df['Open Weight'] == True] | |
open_weight_false = df[df['Open Weight'] == False] | |
# Get max parameter size for open weight models | |
max_parameter_size = open_weight_true['Parameters (B)'].max() if not open_weight_true.empty else 0 | |
# Filter only the open weight models based on parameters | |
if not open_weight_true.empty: | |
if parameters[1] >= max_parameter_size: | |
filtered_open = open_weight_true[ | |
(open_weight_true['Parameters (B)'] >= parameters[0]) | |
] | |
else: | |
filtered_open = open_weight_true[ | |
(open_weight_true['Parameters (B)'] >= parameters[0]) & | |
(open_weight_true['Parameters (B)'] <= parameters[1]) | |
] | |
# Combine filtered open weight models with unfiltered commercial models | |
df = pd.concat([filtered_open, open_weight_false]) | |
if not df.empty: # Check if df is non-empty | |
df = df[(df['Input $/1M tokens'] >= input_price[0]) & (df['Input $/1M tokens'] <= input_price[1])] | |
if not df.empty: # Check if df is non-empty | |
df = df[(df['Output $/1M tokens'] >= output_price[0]) & (df['Output $/1M tokens'] <= output_price[1])] | |
print("Price") | |
print(df) | |
if not df.empty: # Check if df is non-empty | |
if "Image" in multimodal: | |
df = df[df['Image'] == True] | |
if "Multi-Image" in multimodal: | |
df = df[df['Multiple Image'] == True] | |
if "Audio" in multimodal: | |
df = df[df['Audio'] == True] | |
if "Video" in multimodal: | |
df = df[df['Video'] == True] | |
# if not df.empty: # Check if df is non-empty | |
# df = df[(df['Context Size (k)'] >= (context[0])) & (df['Context Size (k)'] <= (context[1]))] | |
print("Modality") | |
print(df) | |
if not df.empty: # Check if df is non-empty | |
if "Open" in open_weight and "Commercial" not in open_weight: | |
df = df[df['Open Weight'] == True] | |
elif "Commercial" in open_weight and "Open" not in open_weight: | |
df = df[df['Open Weight'] == False] | |
elif "Open" not in open_weight and "Commercial" not in open_weight: | |
# Return empty DataFrame with same columns | |
df = pd.DataFrame(columns=df.columns) | |
if not df.empty: # Check if df is non-empty | |
df = df[df['License Name'].apply(lambda x: any(lic in x for lic in license))] | |
# Convert 'Release Date' to int temporarily | |
if not df.empty: # Check if df is non-empty | |
df['Temp Date'] = pd.to_datetime(df['Temp Date']).astype(int) // 10**9 # Convert to seconds since epoch | |
# Convert start and end to int (seconds since epoch) | |
start = int(pd.to_datetime(start).timestamp()) | |
end = int(pd.to_datetime(end).timestamp()) | |
# Filter based on the converted 'Release Date' | |
if not df.empty: # Check if df is non-empty | |
df = df[(df['Temp Date'] >= start) & (df['Temp Date'] <= end)] | |
df = filter_cols(df) | |
df = df.sort_values(by='Clemscore', ascending=False) | |
print(df) | |
return df # Return the filtered dataframe | |