|
""" |
|
This is a streamlit application that allows user to query multiple csv |
|
files and return the answer based on the text in the csv files. |
|
|
|
It uses streamlit to create a web application to load the csv files and |
|
query them. |
|
|
|
Langchain and OpenAI API are used to generate the answer based on the |
|
text in the csv files. |
|
|
|
Credit: https://www.youtube.com/@alejandro_ao ππΏ |
|
""" |
|
|
|
from langchain.agents import create_csv_agent |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.agents.agent_types import AgentType |
|
|
|
import os |
|
import pandas as pd |
|
import streamlit as st |
|
|
|
|
|
def main(): |
|
|
|
st.set_page_config("Data Analysis Helper \U0001F4CA") |
|
st.markdown("# Data Analysis Helper") |
|
st.markdown("This tool helps you analyze your CSV files. Please remember to **remove personal information** first, such as names, addresses, phone numbers, emails, etc.") |
|
st.markdown("## How to use this tool") |
|
st.markdown("1. Upload your CSV file") |
|
st.markdown("2. Ask a question about your CSV file") |
|
st.markdown("3. Wait for the answer to appear") |
|
st.markdown("## Example questions") |
|
st.markdown("1. What is the average age?") |
|
st.markdown("2. What is the average income?") |
|
st.markdown("3. What is the average age of people who live in London?") |
|
st.markdown("Go to [this page](https://openai.com/pricing) to get an OpenAI API key.") |
|
st.markdown("The API key takes the following form: sk****hx. NB: This is not a viable key.") |
|
|
|
|
|
|
|
openai_api_key = st.text_input("Enter your OpenAI API key", type="password") |
|
|
|
|
|
os.environ["OPENAI_API_KEY"] = openai_api_key |
|
|
|
|
|
|
|
|
|
|
|
if os.getenv("OPENAI_API_KEY") is None or os.getenv("OPENAI_API_KEY") == "": |
|
print("OPENAI_API_KEY is not set") |
|
exit(1) |
|
else: |
|
print("OPENAI_API_KEY is set") |
|
|
|
|
|
st.write("Using the gpt-3.5-turbo-0613 model from OpenAI") |
|
|
|
|
|
|
|
csv_file = st.file_uploader("Upload a CSV file", type="csv") |
|
|
|
|
|
if csv_file is not None: |
|
with open(os.path.join(os.getcwd(), csv_file.name), "wb") as f: |
|
f.write(csv_file.getbuffer()) |
|
st.write("CSV file uploaded to: ", os.path.join(os.getcwd(), csv_file.name)) |
|
|
|
|
|
st.write("Preview of the CSV file:") |
|
|
|
df = pd.read_csv(os.path.join(os.getcwd(), csv_file.name)) |
|
st.dataframe(df.head()) |
|
|
|
|
|
agent = create_csv_agent( |
|
ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"), |
|
os.path.join(os.getcwd(), csv_file.name), |
|
verbose=True, |
|
agent_type=AgentType.OPENAI_FUNCTIONS, |
|
) |
|
|
|
|
|
user_question = st.text_input("Ask a question \U0001F914 about your CSV: ") |
|
|
|
|
|
if user_question is not None and user_question != "": |
|
with st.spinner(text="In progress..."): |
|
st.write(agent.run(user_question)) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|