Spaces:

kaleidoscope-data
/

data-cleaning-llm

Runtime error

File size: 2,607 Bytes

""" this app is streamlit app for the current project hosted on HuggingFace spaces """

import streamlit as st
from openai_chat_completion import OpenAIChatCompletions
from dataclean_hf import main
from util import json_to_dict #, join_dicts

st.title("Kaleidoscope Data - Data Cleaning LLM App")

st.write("This app is a demo of the LLM model for data cleaning. It is a work in progress and is not yet ready for production use.")

# text box or csv upload
text_input = st.text_input("Enter text", "")
# csv_file = st.file_uploader("Upload CSV", type=['csv'])

# button to run data cleaning API on text via c class in openai_chat_completion.py
if st.button("Run Data Cleaning API"):
    
    # if text_input is not empty, run data cleaning API on text_input
    if text_input:

        MODEL = "gpt-4" # "gpt-3.5-turbo"
        try:
            with open('prompts/gpt4-system-message3.txt', 'r', encoding='utf8') as f:
                sys_mes = f.read()
                f.close()
            
        except FileNotFoundError:
            with open('../prompts/gpt4-system-message3.txt', 'r', encoding='utf8') as f:
                sys_mes = f.read()
                f.close()
                
        # instantiate OpenAIChatCompletions class
        # get response from openai_chat_completion method
        chat = OpenAIChatCompletions(model=MODEL, system_message=sys_mes)
        response = chat.openai_chat_completion(text_input, n_shot=None)
        
        
        # display response
        # st.write(response['choices'][0]['message']['content'])
        response_content = response['choices'][0]['message']['content']
        st.write(json_to_dict(response_content))
    
    # if csv_file is not empty, run data cleaning API on csv_file
    # elif csv_file:
        
    #     # run data cleaning API on csv_file    
    #     output_df = main(csv_file)

    #     @st.cache_data
    #     def convert_df(df):
    #         """coverting dataframe to csv

    #         Args:
    #             df (_type_): pd.DataFrame

    #         Returns:
    #             _type_: csv
    #         """
    #         # IMPORTANT: Cache the conversion to prevent computation on every rerun
    #         return df.to_csv().encode('utf-8')

    #     csv = convert_df(output_df)

    #     st.download_button(
    #         label="Download data as CSV",
    #         data=csv,
    #         file_name='cleaned_df.csv',
    #         mime='text/csv',
    #     )
    
    # if both text_input and csv_file are empty, display error message
    else:
        st.write("Please enter text or upload a CSV file.")