Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import linktransformer as lt | |
# Function to convert DataFrame to CSV for download | |
def convert_df_to_csv(df): | |
return df.to_csv().encode('utf-8') | |
st.title('DataFrame Merger using LinkTransformer') | |
# Function to load DataFrame | |
def load_dataframe(upload, path): | |
if upload is not None: | |
return pd.read_csv(upload) | |
elif path != "": | |
return pd.read_csv(path) | |
else: | |
return None | |
# Options for DataFrame 1 | |
df1_upload = st.file_uploader("Upload DataFrame 1 (CSV)", type=['csv'], key='df1_upload') | |
df1_path = st.text_input("...or enter path for DataFrame 1 (CSV)", key='df1_path') | |
# Options for DataFrame 2 | |
df2_upload = st.file_uploader("Upload DataFrame 2 (CSV)", type=['csv'], key='df2_upload') | |
df2_path = st.text_input("...or enter path for DataFrame 2 (CSV)", key='df2_path') | |
# Load and display the DataFrames | |
df1 = load_dataframe(df1_upload, df1_path) | |
df2 = load_dataframe(df2_upload, df2_path) | |
if df1 is not None: | |
st.write("DataFrame 1 Preview:") | |
st.dataframe(df1.head()) | |
if df2 is not None: | |
st.write("DataFrame 2 Preview:") | |
st.dataframe(df2.head()) | |
# Model selection | |
model_path = st.text_input("Model path (HuggingFace or local)", value="all-MiniLM-L6-v2") | |
# Checkbox for columns to match on | |
if not df1.empty and not df2.empty: | |
columns_df1 = df1.columns.tolist() | |
columns_df2 = df2.columns.tolist() | |
selected_columns_df1 = st.multiselect("Select columns from DataFrame 1 to match on:", columns_df1, default=columns_df1[0]) | |
selected_columns_df2 = st.multiselect("Select columns from DataFrame 2 to match on:", columns_df2, default=columns_df2[0]) | |
# Perform merge | |
if st.button("Merge DataFrames"): | |
model=lt.LinkTransformer(model_path) | |
df_lm_matched = lt.merge(df2, df1, merge_type='1:m', on=None, model=model, left_on=selected_columns_df1, right_on=selected_columns_df2) | |
st.write("Merged DataFrame Preview:") | |
st.dataframe(df_lm_matched.head()) | |
# Download button for merged DataFrame | |
csv = convert_df_to_csv(df_lm_matched) | |
st.download_button( | |
label="Download merged DataFrame as CSV", | |
data=csv, | |
file_name='merged_dataframe.csv', | |
mime='text/csv', | |
) |