Spaces:

Koshti10
/

TestLLMCalc

Sleeping

App Files Files Community

TestLLMCalc / src /filter_utils.py

Koshti10

Upload 11 files

ef818ff verified 27 days ago

raw

history blame

3.89 kB

	# Utility functions for filtering the dataframe

	import pandas as pd

	def filter_cols(df):

	df = df[[
	'Model Name',
	'Clemscore',
	'Input $/1M tokens',
	'Output $/1M tokens',
	'Latency (s)',
	'Context Size (k)',
	'Parameters (B)',
	'Release Date',
	'License'
	]]

	return df


	def filter(df, language_list, parameters, input_price, output_price, multimodal,
	context, open_weight, start, end, license ):


	if not df.empty: # Check if df is non-empty
	df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))]

	if not df.empty:
	# Split dataframe by Open Weight
	open_weight_true = df[df['Open Weight'] == True]
	open_weight_false = df[df['Open Weight'] == False]

	# Get max parameter size for open weight models
	max_parameter_size = open_weight_true['Parameters (B)'].max() if not open_weight_true.empty else 0

	# Filter only the open weight models based on parameters
	if not open_weight_true.empty:
	if parameters[1] >= max_parameter_size:
	filtered_open = open_weight_true[
	(open_weight_true['Parameters (B)'] >= parameters[0])
	]
	else:
	filtered_open = open_weight_true[
	(open_weight_true['Parameters (B)'] >= parameters[0]) &
	(open_weight_true['Parameters (B)'] <= parameters[1])
	]

	# Combine filtered open weight models with unfiltered commercial models
	df = pd.concat([filtered_open, open_weight_false])

	if not df.empty: # Check if df is non-empty
	df = df[(df['Input $/1M tokens'] >= input_price[0]) & (df['Input $/1M tokens'] <= input_price[1])]

	if not df.empty: # Check if df is non-empty
	df = df[(df['Output $/1M tokens'] >= output_price[0]) & (df['Output $/1M tokens'] <= output_price[1])]


	print("Price")
	print(df)

	if not df.empty: # Check if df is non-empty
	if "Image" in multimodal:
	df = df[df['Image'] == True]
	if "Multi-Image" in multimodal:
	df = df[df['Multiple Image'] == True]
	if "Audio" in multimodal:
	df = df[df['Audio'] == True]
	if "Video" in multimodal:
	df = df[df['Video'] == True]

	# if not df.empty: # Check if df is non-empty
	# df = df[(df['Context Size (k)'] >= (context[0])) & (df['Context Size (k)'] <= (context[1]))]


	print("Modality")
	print(df)

	if not df.empty: # Check if df is non-empty
	if "Open" in open_weight and "Commercial" not in open_weight:
	df = df[df['Open Weight'] == True]
	elif "Commercial" in open_weight and "Open" not in open_weight:
	df = df[df['Open Weight'] == False]
	elif "Open" not in open_weight and "Commercial" not in open_weight:
	# Return empty DataFrame with same columns
	df = pd.DataFrame(columns=df.columns)

	if not df.empty: # Check if df is non-empty
	df = df[df['License Name'].apply(lambda x: any(lic in x for lic in license))]

	# Convert 'Release Date' to int temporarily
	if not df.empty: # Check if df is non-empty
	df['Temp Date'] = pd.to_datetime(df['Temp Date']).astype(int) // 10**9 # Convert to seconds since epoch

	# Convert start and end to int (seconds since epoch)
	start = int(pd.to_datetime(start).timestamp())
	end = int(pd.to_datetime(end).timestamp())

	# Filter based on the converted 'Release Date'
	if not df.empty: # Check if df is non-empty
	df = df[(df['Temp Date'] >= start) & (df['Temp Date'] <= end)]

	df = filter_cols(df)
	df = df.sort_values(by='Clemscore', ascending=False)

	print(df)

	return df # Return the filtered dataframe