96abhishekarora's picture
Create app.py
25f2580 verified
raw
history blame
2.29 kB
import streamlit as st
import pandas as pd
import linktransformer as lt
# Function to convert DataFrame to CSV for download
def convert_df_to_csv(df):
return df.to_csv().encode('utf-8')
st.title('DataFrame Merger using LinkTransformer')
# Function to load DataFrame
def load_dataframe(upload, path):
if upload is not None:
return pd.read_csv(upload)
elif path != "":
return pd.read_csv(path)
else:
return None
# Options for DataFrame 1
df1_upload = st.file_uploader("Upload DataFrame 1 (CSV)", type=['csv'], key='df1_upload')
df1_path = st.text_input("...or enter path for DataFrame 1 (CSV)", key='df1_path')
# Options for DataFrame 2
df2_upload = st.file_uploader("Upload DataFrame 2 (CSV)", type=['csv'], key='df2_upload')
df2_path = st.text_input("...or enter path for DataFrame 2 (CSV)", key='df2_path')
# Load and display the DataFrames
df1 = load_dataframe(df1_upload, df1_path)
df2 = load_dataframe(df2_upload, df2_path)
if df1 is not None:
st.write("DataFrame 1 Preview:")
st.dataframe(df1.head())
if df2 is not None:
st.write("DataFrame 2 Preview:")
st.dataframe(df2.head())
# Model selection
model_path = st.text_input("Model path (HuggingFace or local)", value="all-MiniLM-L6-v2")
# Checkbox for columns to match on
if not df1.empty and not df2.empty:
columns_df1 = df1.columns.tolist()
columns_df2 = df2.columns.tolist()
selected_columns_df1 = st.multiselect("Select columns from DataFrame 1 to match on:", columns_df1, default=columns_df1[0])
selected_columns_df2 = st.multiselect("Select columns from DataFrame 2 to match on:", columns_df2, default=columns_df2[0])
# Perform merge
if st.button("Merge DataFrames"):
model=lt.LinkTransformer(model_path)
df_lm_matched = lt.merge(df2, df1, merge_type='1:m', on=None, model=model, left_on=selected_columns_df1, right_on=selected_columns_df2)
st.write("Merged DataFrame Preview:")
st.dataframe(df_lm_matched.head())
# Download button for merged DataFrame
csv = convert_df_to_csv(df_lm_matched)
st.download_button(
label="Download merged DataFrame as CSV",
data=csv,
file_name='merged_dataframe.csv',
mime='text/csv',
)