Spaces:
Sleeping
Sleeping
Upload 23 files
Browse files- .gitattributes +35 -35
- .gitignore +4 -0
- README.md +59 -12
- app.py +77 -0
- aux_data/column_names.txt +10 -0
- aux_data/file_name.txt +1 -0
- aux_data/query.txt +12 -0
- aux_functions/__init__.py +0 -0
- aux_functions/__pycache__/__init__.cpython-312.pyc +0 -0
- aux_functions/__pycache__/add_data.cpython-312.pyc +0 -0
- aux_functions/__pycache__/aux_functions.cpython-312.pyc +0 -0
- aux_functions/__pycache__/chat_sql_function.cpython-312.pyc +0 -0
- aux_functions/__pycache__/create_tables.cpython-312.pyc +0 -0
- aux_functions/__pycache__/db_functions.cpython-312.pyc +0 -0
- aux_functions/add_data.py +79 -0
- aux_functions/aux_functions.py +38 -0
- aux_functions/chat_sql_function.py +44 -0
- aux_functions/create_tables.py +69 -0
- aux_functions/db.db +0 -0
- aux_functions/db_functions.py +123 -0
- data/salaries.csv +0 -0
- db.db +0 -0
- requirements.txt +80 -0
.gitattributes
CHANGED
@@ -1,35 +1,35 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
a1.py
|
3 |
+
app.ipynb
|
4 |
+
/myenv
|
README.md
CHANGED
@@ -1,12 +1,59 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SQL Chatbot for Easy Data Extraction
|
2 |
+
|
3 |
+
This repository provides a user-friendly SQL chatbot that simplifies extracting information from your tabular files. Leverage the power of OpenAI's language processing and LangChain's SQL generation capabilities to perform insightful queries in a natural language interface.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- **Intuitive Chat Interface:** Interact with your data in a conversational manner using plain English queries.
|
8 |
+
- **Streamlit UI:** Enjoy a visually appealing web app for seamless interaction with the chatbot.
|
9 |
+
- **SQLite Support:** Utilize SQLite, a lightweight and embedded database engine, for efficient data storage.
|
10 |
+
|
11 |
+
## Requirements
|
12 |
+
|
13 |
+
- Python 3.x (https://www.python.org/downloads/)
|
14 |
+
- OpenAI API Key (https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key)
|
15 |
+
- LangChain API Key ([invalid URL removed]) (Note: Check LangChain documentation for API key acquisition instructions)
|
16 |
+
- `requirements.txt` (included)
|
17 |
+
|
18 |
+
## Installation and Configuration
|
19 |
+
|
20 |
+
```bash
|
21 |
+
git clone https://github.com/paramrajyadav/sql_chat_bot.git
|
22 |
+
```
|
23 |
+
```bash
|
24 |
+
cd sql-chatbot
|
25 |
+
```
|
26 |
+
```bash
|
27 |
+
pip install -r requirements.txt
|
28 |
+
```
|
29 |
+
|
30 |
+
# Create a .env file and add these lines, replacing placeholders with your actual keys:
|
31 |
+
OPENAI_API_KEY=your_openai_api_key
|
32 |
+
|
33 |
+
LANGCHAIN_API_KEY=your_langchain_api_key
|
34 |
+
|
35 |
+
## Usage
|
36 |
+
Run the application: streamlit run app.py
|
37 |
+
|
38 |
+
Access the Streamlit app in your web browser (usually http://localhost:8501).
|
39 |
+
|
40 |
+
Interact with the chatbot by typing natural language queries in the input field.
|
41 |
+
|
42 |
+
The chatbot will translate your queries into SQL and retrieve the desired information from your table.
|
43 |
+
|
44 |
+
|
45 |
+
## Sample Queries
|
46 |
+
"Show me all customers in California."
|
47 |
+
|
48 |
+
"What are the average order values per product category?"
|
49 |
+
|
50 |
+
"Find orders placed between January and March 2024."
|
51 |
+
|
52 |
+
|
53 |
+
## Contributing
|
54 |
+
|
55 |
+
We welcome contributions from the community! Here are some ways you can get involved:
|
56 |
+
|
57 |
+
* **Bug reports:** If you find a bug, please create an issue on the GitHub repository. Be sure to include clear steps to reproduce the bug.
|
58 |
+
* **Feature requests:** If you have an idea for a new feature, please create an issue on the GitHub repository. Describe the feature in detail and explain why it would be valuable.
|
59 |
+
* **Pull requests:** If you have implemented a bug fix or feature, you can submit a pull request. Make sure to follow the contributing guidelines (if available) and test your changes thoroughly.
|
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from aux_functions.aux_functions import *
|
4 |
+
from aux_functions.add_data import *
|
5 |
+
from aux_functions.create_tables import *
|
6 |
+
from aux_functions.db_functions import *
|
7 |
+
from aux_functions.chat_sql_function import *
|
8 |
+
|
9 |
+
project_dir = os.getcwd()
|
10 |
+
|
11 |
+
st.set_page_config(layout="wide")
|
12 |
+
|
13 |
+
# Define the header of the app
|
14 |
+
st.markdown("# SQL Chatbot")
|
15 |
+
|
16 |
+
# Initialize session state if not already done
|
17 |
+
if 'selected_index' not in st.session_state:
|
18 |
+
st.session_state['selected_index'] = 0
|
19 |
+
|
20 |
+
def reset_app():
|
21 |
+
clean_data_folder(project_dir)
|
22 |
+
st.session_state['selected_index'] = 0
|
23 |
+
|
24 |
+
|
25 |
+
selected_index = st.session_state['selected_index']
|
26 |
+
|
27 |
+
if st.button("Reset the App", key="reset_app"):
|
28 |
+
reset_app()
|
29 |
+
|
30 |
+
cols = st.columns(3)
|
31 |
+
|
32 |
+
# Create menu buttons dynamically
|
33 |
+
for i, label in enumerate(["Home", "Create Table", "Query"]):
|
34 |
+
if cols[i].button(label, key=f"menu_button_{i}"):
|
35 |
+
selected_index = i # Select corresponding menu (Menu 1 = 1, Menu 2 = 2, etc.)
|
36 |
+
st.session_state['selected_index'] = selected_index
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
if selected_index == 0:
|
43 |
+
st.markdown("# Upload your file")
|
44 |
+
handle_file_upload()
|
45 |
+
|
46 |
+
elif selected_index == 1:
|
47 |
+
st.write("This is the display code for Create Table.")
|
48 |
+
built_table()
|
49 |
+
if st.button("Insert values in the database"):
|
50 |
+
file_name_from_file(project_dir)
|
51 |
+
drop_sqlite_database()
|
52 |
+
setup_database()
|
53 |
+
import_csv(project_dir)
|
54 |
+
st.markdown(" # Proceed to the QA Part")
|
55 |
+
|
56 |
+
elif selected_index == 2:
|
57 |
+
st.write("# Enter your Question.")
|
58 |
+
query = st.text_input("Question")
|
59 |
+
if st.button("Initiate the Chatbot"):
|
60 |
+
x = initiate_chat(query)
|
61 |
+
st.write(x)
|
62 |
+
|
63 |
+
button_col1, button_col2 = st.columns([1, 1])
|
64 |
+
|
65 |
+
if button_col1.button("Go to previous Menu", key="prev_menu"):
|
66 |
+
if selected_index == 0:
|
67 |
+
st.write("Try Reset Button")
|
68 |
+
else:
|
69 |
+
selected_index -= 1
|
70 |
+
st.session_state['selected_index'] = selected_index
|
71 |
+
|
72 |
+
if button_col2.button("Go to next Menu", key="next_menu"):
|
73 |
+
if selected_index == 2:
|
74 |
+
st.write("Try Reset Button")
|
75 |
+
else:
|
76 |
+
selected_index += 1
|
77 |
+
st.session_state['selected_index'] = selected_index
|
aux_data/column_names.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
experience_level
|
2 |
+
employment_type
|
3 |
+
job_title
|
4 |
+
salary
|
5 |
+
salary_currency
|
6 |
+
salary_in_usd
|
7 |
+
employee_residence
|
8 |
+
remote_ratio
|
9 |
+
company_location
|
10 |
+
company_size
|
aux_data/file_name.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
salaries
|
aux_data/query.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CREATE TABLE salaries (
|
2 |
+
experience_level VARCHAR(250),
|
3 |
+
employment_type VARCHAR(250),
|
4 |
+
job_title VARCHAR(250),
|
5 |
+
salary INT,
|
6 |
+
salary_currency VARCHAR(250),
|
7 |
+
salary_in_usd INT,
|
8 |
+
employee_residence VARCHAR(250),
|
9 |
+
remote_ratio INT,
|
10 |
+
company_location VARCHAR(250),
|
11 |
+
company_size VARCHAR(250)
|
12 |
+
);
|
aux_functions/__init__.py
ADDED
File without changes
|
aux_functions/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (172 Bytes). View file
|
|
aux_functions/__pycache__/add_data.cpython-312.pyc
ADDED
Binary file (3.58 kB). View file
|
|
aux_functions/__pycache__/aux_functions.cpython-312.pyc
ADDED
Binary file (1.6 kB). View file
|
|
aux_functions/__pycache__/chat_sql_function.cpython-312.pyc
ADDED
Binary file (1.53 kB). View file
|
|
aux_functions/__pycache__/create_tables.cpython-312.pyc
ADDED
Binary file (3.83 kB). View file
|
|
aux_functions/__pycache__/db_functions.cpython-312.pyc
ADDED
Binary file (5.14 kB). View file
|
|
aux_functions/add_data.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import streamlit as st
|
4 |
+
import csv
|
5 |
+
from aux_functions import *
|
6 |
+
|
7 |
+
def handle_file_upload(upload_dir='data', aux_data_folder='aux_data'):
|
8 |
+
"""
|
9 |
+
Handles the file upload, storage, and extraction of column names.
|
10 |
+
Stores the file name and column names in an auxiliary text file.
|
11 |
+
|
12 |
+
Parameters:
|
13 |
+
upload_dir (str): The directory to store uploaded files.
|
14 |
+
aux_file (str): The name of the auxiliary text file to store file name and column names.
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
file_name (str): The name of the uploaded file.
|
18 |
+
column_names (list): The list of column names in the uploaded file.
|
19 |
+
"""
|
20 |
+
# Create the directory if it doesn't exist
|
21 |
+
if not os.path.exists(upload_dir):
|
22 |
+
os.makedirs(upload_dir)
|
23 |
+
|
24 |
+
# File uploader
|
25 |
+
uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xlsx'])
|
26 |
+
|
27 |
+
if uploaded_file is not None:
|
28 |
+
# Store file in the specified directory
|
29 |
+
# clean_data_folder()
|
30 |
+
file_name = uploaded_file.name
|
31 |
+
file_path = os.path.join(upload_dir, file_name)
|
32 |
+
|
33 |
+
for filename in os.listdir(upload_dir):
|
34 |
+
file_path_existing = os.path.join(upload_dir, filename)
|
35 |
+
if os.path.isfile(file_path_existing) and filename != file_name:
|
36 |
+
os.remove(file_path_existing)
|
37 |
+
# st.write(f"Deleted: {file_path_existing}")
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
with open(file_path, "wb") as f:
|
42 |
+
f.write(uploaded_file.getbuffer())
|
43 |
+
|
44 |
+
st.success(f"File {file_name} saved successfully!")
|
45 |
+
|
46 |
+
# Read the file and get column names
|
47 |
+
if file_name.endswith('.csv'):
|
48 |
+
df = pd.read_csv(file_path)
|
49 |
+
|
50 |
+
else:
|
51 |
+
df = pd.read_xml(file_path)
|
52 |
+
|
53 |
+
column_names = df.columns.tolist()
|
54 |
+
|
55 |
+
|
56 |
+
if not os.path.exists(aux_data_folder):
|
57 |
+
os.makedirs(aux_data_folder)
|
58 |
+
|
59 |
+
# Define the filenames for the text files
|
60 |
+
file_name_filename = os.path.join(aux_data_folder, "file_name.txt")
|
61 |
+
column_names_filename = os.path.join(aux_data_folder, "column_names.txt")
|
62 |
+
|
63 |
+
# Write file name to its respective text file
|
64 |
+
with open(file_name_filename, "w") as file_name_file:
|
65 |
+
file_name_without_extension = file_name.rsplit('.', 1)[0]
|
66 |
+
file_name_file.write(file_name_without_extension)
|
67 |
+
|
68 |
+
# Write column names to its respective text file
|
69 |
+
with open(column_names_filename, "w") as column_names_file:
|
70 |
+
for col in column_names:
|
71 |
+
column_names_file.write(col + "\n")
|
72 |
+
st.markdown("## File upload Successful, Click on 'Go to next Menu' to proceed forward ")
|
73 |
+
|
74 |
+
return file_name, column_names
|
75 |
+
else:
|
76 |
+
st.info("Please upload a file")
|
77 |
+
return None, None
|
78 |
+
|
79 |
+
|
aux_functions/aux_functions.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
|
4 |
+
def clean_data_folder(proj_dir=r"C:\Users\pcraj\OneDrive\Desktop\projects\new_test"):
|
5 |
+
try:
|
6 |
+
# Get the current working directory
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
# Define the folder name to clean
|
11 |
+
folder_name = 'data'
|
12 |
+
folder_name2 = 'aux_data'
|
13 |
+
|
14 |
+
# Construct the full path to the folder
|
15 |
+
folder_path = os.path.join(proj_dir, folder_name)
|
16 |
+
|
17 |
+
# print(folder_name)
|
18 |
+
|
19 |
+
# Check if the folder exists
|
20 |
+
if os.path.exists(folder_path):
|
21 |
+
try:
|
22 |
+
# Delete the folder and its contents
|
23 |
+
shutil.rmtree(folder_path)
|
24 |
+
print(f"Successfully deleted the folder: {folder_path}")
|
25 |
+
except OSError as e:
|
26 |
+
print(f"Error: {folder_path} : {e.strerror}")
|
27 |
+
except PermissionError:
|
28 |
+
print(f"Permission error occurred while trying to delete contents of '{folder_name}'.")
|
29 |
+
except Exception as e:
|
30 |
+
print(f"An error occurred: {e}")
|
31 |
+
else:
|
32 |
+
print(f"The folder '{folder_path}' does not exist.")
|
33 |
+
|
34 |
+
except Exception as e:
|
35 |
+
print(f"An error occurred: {e}")
|
36 |
+
|
37 |
+
# Example usage:
|
38 |
+
clean_data_folder()
|
aux_functions/chat_sql_function.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from langchain_community.utilities import SQLDatabase
|
4 |
+
from langchain_community.agent_toolkits import create_sql_agent
|
5 |
+
from langchain_openai import ChatOpenAI
|
6 |
+
from huggingface_hub import notebook_login
|
7 |
+
from huggingface_hub import hf_secrets
|
8 |
+
|
9 |
+
|
10 |
+
# Load environment variables from .env file
|
11 |
+
def initiate_chat(querry):
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
notebook_login()
|
15 |
+
|
16 |
+
# Set environment variables for API keys
|
17 |
+
# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
18 |
+
# os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
|
19 |
+
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
20 |
+
|
21 |
+
# Define the SQL database URI
|
22 |
+
db_uri = "sqlite:///db.db"
|
23 |
+
|
24 |
+
# Initialize the SQLDatabase object
|
25 |
+
db = SQLDatabase.from_uri(db_uri)
|
26 |
+
api_key = hf_secrets.get("open_api")
|
27 |
+
|
28 |
+
# Initialize the ChatOpenAI object with the desired model
|
29 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=api_key)
|
30 |
+
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
31 |
+
|
32 |
+
# Create the SQL agent
|
33 |
+
agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True)
|
34 |
+
|
35 |
+
# Define the query to get the sum of all salaries
|
36 |
+
|
37 |
+
|
38 |
+
# Execute the query using the agent
|
39 |
+
try:
|
40 |
+
result = agent_executor.invoke(querry)
|
41 |
+
return result
|
42 |
+
except Exception as e:
|
43 |
+
print(f"Error: {e}")
|
44 |
+
return "Error: " + str(e)
|
aux_functions/create_tables.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
def read_aux_data():
|
6 |
+
"""
|
7 |
+
Reads the file name and column names from separate files within the aux_data directory.
|
8 |
+
|
9 |
+
Returns:
|
10 |
+
column_names (str): The column names read from column_names.txt.
|
11 |
+
file_name (str): The file name read from file_name.txt.
|
12 |
+
|
13 |
+
"""
|
14 |
+
column_names_filename = 'aux_data/column_names.txt'
|
15 |
+
file_name_filename = 'aux_data/file_name.txt'
|
16 |
+
with open(column_names_filename, 'r') as column_file:
|
17 |
+
column_names = [line.strip() for line in column_file.readlines()]
|
18 |
+
|
19 |
+
# Read file name
|
20 |
+
with open(file_name_filename, 'r') as file_file:
|
21 |
+
file_name = file_file.read().strip()
|
22 |
+
|
23 |
+
return column_names, file_name
|
24 |
+
|
25 |
+
def built_table():
|
26 |
+
st.title("File Upload, Data Preview, and SQL Schema Creation")
|
27 |
+
|
28 |
+
column_names, file_name = read_aux_data()
|
29 |
+
|
30 |
+
st.write("## Load and Display Saved Column Data")
|
31 |
+
st.write(column_names)
|
32 |
+
|
33 |
+
st.write("## Load and Display Sample Data")
|
34 |
+
file_path = os.path.join('data', file_name)
|
35 |
+
|
36 |
+
if os.path.exists(f"{file_path}.csv"):
|
37 |
+
df = pd.read_csv(f"{file_path}.csv")
|
38 |
+
st.write(df.head(20))
|
39 |
+
else:
|
40 |
+
st.write("Sample data not found.")
|
41 |
+
|
42 |
+
st.write("## Create SQL Schema")
|
43 |
+
sql_schema = {}
|
44 |
+
data_types = ['VARCHAR(250)', 'INT', 'FLOAT', 'DATE', 'DOUBLE']
|
45 |
+
|
46 |
+
for col in column_names:
|
47 |
+
selectbox_key = f"{col}_selectbox"
|
48 |
+
data_type = st.selectbox(f"Select data type for column '{col}'", [''] + data_types, key=selectbox_key)
|
49 |
+
|
50 |
+
if data_type == '':
|
51 |
+
st.warning(f"Please select a data type for column '{col}'.")
|
52 |
+
st.stop() # Stop execution if a data type is not selected
|
53 |
+
sql_schema[col] = data_type
|
54 |
+
|
55 |
+
st.write("### Generated SQL Schema")
|
56 |
+
create_table_query = f"CREATE TABLE {file_name} (\n"
|
57 |
+
create_table_query += ",\n".join([f" {col} {dtype}" for col, dtype in sql_schema.items()])
|
58 |
+
create_table_query += "\n);"
|
59 |
+
|
60 |
+
st.code(create_table_query, language='sql')
|
61 |
+
|
62 |
+
if st.button("Schema is correct"):
|
63 |
+
# Store the query in a file
|
64 |
+
query_filename = 'aux_data/query.txt'
|
65 |
+
with open(query_filename, 'w') as query_file:
|
66 |
+
query_file.write(create_table_query)
|
67 |
+
st.success("Query has been saved.")
|
68 |
+
|
69 |
+
return None
|
aux_functions/db.db
ADDED
File without changes
|
aux_functions/db_functions.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import csv
|
3 |
+
import os
|
4 |
+
def read_query_from_file(proj_dir=r"C:\Users\pcraj\OneDrive\Desktop\projects\new_test"):
|
5 |
+
file_path = 'aux_data/query.txt'
|
6 |
+
file_name=os.path.join(proj_dir,file_path)
|
7 |
+
try:
|
8 |
+
with open(file_name, 'r') as file:
|
9 |
+
CreateTable = file.read()
|
10 |
+
return CreateTable
|
11 |
+
except FileNotFoundError:
|
12 |
+
print(f"File {file_path} not found.")
|
13 |
+
return None
|
14 |
+
|
15 |
+
def file_name_from_file(proj_dir=r"C:\Users\pcraj\OneDrive\Desktop\projects\new_test"):
|
16 |
+
file_path = 'aux_data/file_name.txt'
|
17 |
+
file_name=os.path.join(proj_dir,file_path)
|
18 |
+
try:
|
19 |
+
with open(file_name, 'r') as file:
|
20 |
+
return file.read().strip() # Remove leading/trailing whitespaces
|
21 |
+
except FileNotFoundError:
|
22 |
+
print(f"File {file_path} not found.")
|
23 |
+
return None
|
24 |
+
|
25 |
+
|
26 |
+
import sqlite3
|
27 |
+
|
28 |
+
def drop_sqlite_database():
|
29 |
+
"""
|
30 |
+
Deletes the SQLite database file at the specified path.
|
31 |
+
|
32 |
+
Parameters:
|
33 |
+
db_path (str): Path to the SQLite database file.
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
None
|
37 |
+
"""
|
38 |
+
|
39 |
+
db_path = 'db.db'
|
40 |
+
try:
|
41 |
+
if os.path.exists(db_path):
|
42 |
+
os.remove(db_path)
|
43 |
+
print(f"Database at {db_path} has been deleted.")
|
44 |
+
else:
|
45 |
+
print(f"No database file found at {db_path}.")
|
46 |
+
except Exception as e:
|
47 |
+
print(f"An error occurred while deleting the database: {e}")
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
def setup_database():
|
55 |
+
|
56 |
+
import sqlite3
|
57 |
+
create_table_query=read_query_from_file()
|
58 |
+
# create_table_query_2 =create_table_querry
|
59 |
+
|
60 |
+
# print(create_table_query_2)
|
61 |
+
connection = sqlite3.connect('db.db')
|
62 |
+
cursor = connection.cursor()
|
63 |
+
|
64 |
+
|
65 |
+
cursor.execute(str(create_table_query))
|
66 |
+
connection.commit()
|
67 |
+
connection.close()
|
68 |
+
|
69 |
+
|
70 |
+
import sqlite3
|
71 |
+
import csv
|
72 |
+
|
73 |
+
def import_csv(proj_dir=r"C:\Users\pcraj\OneDrive\Desktop\projects\new_test"):
|
74 |
+
try:
|
75 |
+
connection = sqlite3.connect('db.db')
|
76 |
+
c = connection.cursor()
|
77 |
+
|
78 |
+
file = file_name_from_file(proj_dir) # Assuming this function is defined elsewhere
|
79 |
+
print("File to be imported:", file)
|
80 |
+
folder_name=os.path.join(proj_dir,"data")
|
81 |
+
|
82 |
+
with open(f"{folder_name}/{file}.csv", 'r', encoding='utf-8') as f:
|
83 |
+
reader = csv.reader(f)
|
84 |
+
print("Opened CSV file")
|
85 |
+
next(reader) # Skip the header row if present
|
86 |
+
|
87 |
+
# Dynamically determine the number of columns in the CSV
|
88 |
+
first_row = next(reader)
|
89 |
+
num_columns = len(first_row)
|
90 |
+
placeholders = ','.join(['?'] * num_columns)
|
91 |
+
|
92 |
+
# Re-insert the first row into the reader
|
93 |
+
reader = csv.reader(f)
|
94 |
+
next(reader) # Skip the header row again
|
95 |
+
|
96 |
+
for row in reader:
|
97 |
+
print("Inserting row:", row)
|
98 |
+
c.execute(f"INSERT INTO {file} VALUES ({placeholders})", row)
|
99 |
+
|
100 |
+
connection.commit()
|
101 |
+
print("Data imported successfully")
|
102 |
+
except sqlite3.Error as e:
|
103 |
+
print("SQLite error:", e)
|
104 |
+
except Exception as e:
|
105 |
+
print("Error:", e)
|
106 |
+
finally:
|
107 |
+
if connection:
|
108 |
+
connection.close()
|
109 |
+
print("Database connection closed")
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
if __name__ == "__main__":
|
116 |
+
print("h1")
|
117 |
+
print(file_name_from_file())
|
118 |
+
print("h2")
|
119 |
+
drop_sqlite_database()
|
120 |
+
print("h3")
|
121 |
+
setup_database()
|
122 |
+
print("h4")
|
123 |
+
import_csv()
|
data/salaries.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
db.db
ADDED
Binary file (721 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.9.5
|
2 |
+
aiosignal==1.3.1
|
3 |
+
altair==5.3.0
|
4 |
+
annotated-types==0.7.0
|
5 |
+
anyio==4.4.0
|
6 |
+
attrs==23.2.0
|
7 |
+
blinker==1.8.2
|
8 |
+
cachetools==5.3.3
|
9 |
+
certifi==2024.6.2
|
10 |
+
charset-normalizer==3.3.2
|
11 |
+
click==8.1.7
|
12 |
+
colorama==0.4.6
|
13 |
+
dataclasses-json==0.6.7
|
14 |
+
distro==1.9.0
|
15 |
+
filelock==3.15.1
|
16 |
+
frozenlist==1.4.1
|
17 |
+
fsspec==2024.6.0
|
18 |
+
gitdb==4.0.11
|
19 |
+
GitPython==3.1.43
|
20 |
+
greenlet==3.0.3
|
21 |
+
h11==0.14.0
|
22 |
+
httpcore==1.0.5
|
23 |
+
httpx==0.27.0
|
24 |
+
huggingface-hub==0.23.4
|
25 |
+
idna==3.7
|
26 |
+
Jinja2==3.1.4
|
27 |
+
jsonpatch==1.33
|
28 |
+
jsonpointer==3.0.0
|
29 |
+
jsonschema==4.22.0
|
30 |
+
jsonschema-specifications==2023.12.1
|
31 |
+
langchain==0.2.5
|
32 |
+
langchain-community==0.2.5
|
33 |
+
langchain-core==0.2.7
|
34 |
+
langchain-openai==0.1.8
|
35 |
+
langchain-text-splitters==0.2.1
|
36 |
+
langsmith==0.1.77
|
37 |
+
markdown-it-py==3.0.0
|
38 |
+
MarkupSafe==2.1.5
|
39 |
+
marshmallow==3.21.3
|
40 |
+
mdurl==0.1.2
|
41 |
+
multidict==6.0.5
|
42 |
+
mypy-extensions==1.0.0
|
43 |
+
numpy==1.26.4
|
44 |
+
openai==1.34.0
|
45 |
+
orjson==3.10.5
|
46 |
+
packaging==24.1
|
47 |
+
pandas==2.2.2
|
48 |
+
pillow==10.3.0
|
49 |
+
protobuf==4.25.3
|
50 |
+
pyarrow==16.1.0
|
51 |
+
pydantic==2.7.4
|
52 |
+
pydantic_core==2.18.4
|
53 |
+
pydeck==0.9.1
|
54 |
+
Pygments==2.18.0
|
55 |
+
python-dateutil==2.9.0.post0
|
56 |
+
python-dotenv==1.0.1
|
57 |
+
pytz==2024.1
|
58 |
+
PyYAML==6.0.1
|
59 |
+
referencing==0.35.1
|
60 |
+
regex==2024.5.15
|
61 |
+
requests==2.32.3
|
62 |
+
rich==13.7.1
|
63 |
+
rpds-py==0.18.1
|
64 |
+
six==1.16.0
|
65 |
+
smmap==5.0.1
|
66 |
+
sniffio==1.3.1
|
67 |
+
SQLAlchemy==2.0.30
|
68 |
+
streamlit==1.35.0
|
69 |
+
tenacity==8.3.0
|
70 |
+
tiktoken==0.7.0
|
71 |
+
toml==0.10.2
|
72 |
+
toolz==0.12.1
|
73 |
+
tornado==6.4.1
|
74 |
+
tqdm==4.66.4
|
75 |
+
typing-inspect==0.9.0
|
76 |
+
typing_extensions==4.12.2
|
77 |
+
tzdata==2024.1
|
78 |
+
urllib3==2.2.1
|
79 |
+
watchdog==4.0.1
|
80 |
+
yarl==1.9.4
|