Spaces:

Koshti10
/

TestLLMCalc

Sleeping

File size: 3,892 Bytes

ef818ff

# Utility functions for filtering the dataframe

import pandas as pd

def filter_cols(df):

    df = df[[
    'Model Name', 
    'Clemscore',
    'Input $/1M tokens', 
    'Output $/1M tokens',
    'Latency (s)',
    'Context Size (k)', 
    'Parameters (B)',
    'Release Date', 
    'License'
    ]]
    
    return df


def filter(df, language_list, parameters, input_price, output_price, multimodal,
           context, open_weight, start, end, license ):
    

    if not df.empty:  # Check if df is non-empty
        df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))]

    if not df.empty:
        # Split dataframe by Open Weight
        open_weight_true = df[df['Open Weight'] == True]
        open_weight_false = df[df['Open Weight'] == False]
        
        # Get max parameter size for open weight models
        max_parameter_size = open_weight_true['Parameters (B)'].max() if not open_weight_true.empty else 0
        
        # Filter only the open weight models based on parameters
        if not open_weight_true.empty:
            if parameters[1] >= max_parameter_size:
                filtered_open = open_weight_true[
                    (open_weight_true['Parameters (B)'] >= parameters[0])
                ]
            else:
                filtered_open = open_weight_true[
                    (open_weight_true['Parameters (B)'] >= parameters[0]) & 
                    (open_weight_true['Parameters (B)'] <= parameters[1])
                ]
            
            # Combine filtered open weight models with unfiltered commercial models
            df = pd.concat([filtered_open, open_weight_false])

    if not df.empty:  # Check if df is non-empty
        df = df[(df['Input $/1M tokens'] >= input_price[0]) & (df['Input $/1M tokens'] <= input_price[1])]
    
    if not df.empty:  # Check if df is non-empty
        df = df[(df['Output $/1M tokens'] >= output_price[0]) & (df['Output $/1M tokens'] <= output_price[1])]


    print("Price")    
    print(df)

    if not df.empty:  # Check if df is non-empty
        if "Image" in multimodal:
            df = df[df['Image'] == True]
        if "Multi-Image" in multimodal:
            df = df[df['Multiple Image'] == True]
        if "Audio" in multimodal:
            df = df[df['Audio'] == True]
        if "Video" in multimodal:
            df = df[df['Video'] == True]

    # if not df.empty:  # Check if df is non-empty
    #     df = df[(df['Context Size (k)'] >= (context[0])) & (df['Context Size (k)'] <= (context[1]))]


    print("Modality")    
    print(df)

    if not df.empty:  # Check if df is non-empty
        if "Open" in open_weight and "Commercial" not in open_weight:
            df = df[df['Open Weight'] == True]
        elif "Commercial" in open_weight and "Open" not in open_weight:
            df = df[df['Open Weight'] == False]
        elif "Open" not in open_weight and "Commercial" not in open_weight:
            # Return empty DataFrame with same columns
            df = pd.DataFrame(columns=df.columns)
        
    if not df.empty:  # Check if df is non-empty
        df = df[df['License Name'].apply(lambda x: any(lic in x for lic in license))]

    # Convert 'Release Date' to int temporarily
    if not df.empty:  # Check if df is non-empty
        df['Temp Date'] = pd.to_datetime(df['Temp Date']).astype(int) // 10**9  # Convert to seconds since epoch

    # Convert start and end to int (seconds since epoch)
    start = int(pd.to_datetime(start).timestamp())  
    end = int(pd.to_datetime(end).timestamp())    

    # Filter based on the converted 'Release Date'
    if not df.empty:  # Check if df is non-empty
        df = df[(df['Temp Date'] >= start) & (df['Temp Date'] <= end)]

    df = filter_cols(df)
    df = df.sort_values(by='Clemscore', ascending=False)

    print(df)

    return df  # Return the filtered dataframe