""" This is a streamlit application that allows user to query multiple csv files and return the answer based on the text in the csv files. It uses streamlit to create a web application to load the csv files and query them. Langchain and OpenAI API are used to generate the answer based on the text in the csv files. Credit: https://www.youtube.com/@alejandro_ao 👊🏿 """ from langchain.agents import create_csv_agent from langchain.chat_models import ChatOpenAI from langchain.agents.agent_types import AgentType #from dotenv import load_dotenv import os import pandas as pd import streamlit as st def main(): # set the page title st.set_page_config("Data Analysis Helper \U0001F4CA") st.markdown("# Data Analysis Helper") st.markdown("This tool helps you analyze your CSV files. Please remember to **remove personal information** first, such as names, addresses, phone numbers, emails, etc.") st.markdown("## How to use this tool") st.markdown("1. Upload your CSV file") st.markdown("2. Ask a question about your CSV file") st.markdown("3. Wait for the answer to appear") st.markdown("## Example questions") st.markdown("1. What is the average age?") st.markdown("2. What is the average income?") st.markdown("3. What is the average age of people who live in London?") st.markdown("Go to [this page](https://openai.com/pricing) to get an OpenAI API key.") st.markdown("The API key takes the following form: sk****hx. NB: This is not a viable key.") # text input to ask for openai api key # then hide the input openai_api_key = st.text_input("Enter your OpenAI API key", type="password") # set this key as an environment variable os.environ["OPENAI_API_KEY"] = openai_api_key # load the api key from the .env file #load_dotenv() # inform the user that the api key is loaded if os.getenv("OPENAI_API_KEY") is None or os.getenv("OPENAI_API_KEY") == "": print("OPENAI_API_KEY is not set") exit(1) else: print("OPENAI_API_KEY is set") # inform user that the model being used is the turbo model st.write("Using the gpt-3.5-turbo-0613 model from OpenAI") # Upload the CSV file csv_file = st.file_uploader("Upload a CSV file", type="csv") # if the user has uploaded a csv file then save it to the current directory if csv_file is not None: with open(os.path.join(os.getcwd(), csv_file.name), "wb") as f: f.write(csv_file.getbuffer()) st.write("CSV file uploaded to: ", os.path.join(os.getcwd(), csv_file.name)) # see a preview of the csv file st.write("Preview of the CSV file:") # see a preview of the csv file df = pd.read_csv(os.path.join(os.getcwd(), csv_file.name)) st.dataframe(df.head()) # Display the first few rows of the DataFrame # create the agent agent = create_csv_agent( ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"), os.path.join(os.getcwd(), csv_file.name), verbose=True, agent_type=AgentType.OPENAI_FUNCTIONS, ) # ask the user for a question user_question = st.text_input("Ask a question \U0001F914 about your CSV: ") # if the user has asked a question then run the agent if user_question is not None and user_question != "": with st.spinner(text="In progress..."): st.write(agent.run(user_question)) if __name__ == "__main__": main()