DrishtiSharma commited on
Commit
2eb7a40
Β·
verified Β·
1 Parent(s): 1a7207d

Delete mylab/ds_display_issue.py

Browse files
Files changed (1) hide show
  1. mylab/ds_display_issue.py +0 -193
mylab/ds_display_issue.py DELETED
@@ -1,193 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import sqlite3
4
- import os
5
- import json
6
- from pathlib import Path
7
- import plotly.express as px
8
- from datetime import datetime, timezone
9
- from crewai import Agent, Crew, Process, Task
10
- from crewai.tools import tool
11
- from langchain_groq import ChatGroq
12
- from langchain_openai import ChatOpenAI
13
- from langchain.schema.output import LLMResult
14
- from langchain_community.tools.sql_database.tool import (
15
- InfoSQLDatabaseTool,
16
- ListSQLDatabaseTool,
17
- QuerySQLCheckerTool,
18
- QuerySQLDataBaseTool,
19
- )
20
- from langchain_community.utilities.sql_database import SQLDatabase
21
- from datasets import load_dataset
22
- import tempfile
23
-
24
- st.title("SQL-RAG Using CrewAI πŸš€")
25
- st.write("Analyze datasets using natural language queries powered by SQL and CrewAI.")
26
-
27
- # Initialize LLM
28
- llm = None
29
-
30
- # Model Selection
31
- model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
32
-
33
- # API Key Validation and LLM Initialization
34
- groq_api_key = os.getenv("GROQ_API_KEY")
35
- openai_api_key = os.getenv("OPENAI_API_KEY")
36
-
37
- if model_choice == "llama-3.3-70b":
38
- if not groq_api_key:
39
- st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
40
- llm = None
41
- else:
42
- llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
43
- elif model_choice == "GPT-4o":
44
- if not openai_api_key:
45
- st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
46
- llm = None
47
- else:
48
- llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
49
-
50
- # Initialize session state for data persistence
51
- if "df" not in st.session_state:
52
- st.session_state.df = None
53
-
54
- # Dataset Input
55
- input_option = st.radio("Select Dataset Input:", ["Use Hugging Face Dataset", "Upload CSV File"])
56
-
57
- if input_option == "Use Hugging Face Dataset":
58
- dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="Einstellung/demo-salaries")
59
- if st.button("Load Dataset"):
60
- try:
61
- with st.spinner("Loading dataset..."):
62
- dataset = load_dataset(dataset_name, split="train")
63
- st.session_state.df = pd.DataFrame(dataset)
64
- st.success(f"Dataset '{dataset_name}' loaded successfully!")
65
- except Exception as e:
66
- st.error(f"Error: {e}")
67
-
68
- elif input_option == "Upload CSV File":
69
- uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"])
70
- if uploaded_file:
71
- try:
72
- st.session_state.df = pd.read_csv(uploaded_file)
73
- st.success("File uploaded successfully!")
74
- except Exception as e:
75
- st.error(f"Error loading file: {e}")
76
-
77
- # Display Dataset Preview
78
- if st.session_state.df is not None:
79
- st.subheader("πŸ“‚ Dataset Preview")
80
- st.dataframe(st.session_state.df.head())
81
-
82
-
83
- # SQL-RAG Analysis
84
- if st.session_state.df is not None:
85
- temp_dir = tempfile.TemporaryDirectory()
86
- db_path = os.path.join(temp_dir.name, "data.db")
87
- connection = sqlite3.connect(db_path)
88
- st.session_state.df.to_sql("salaries", connection, if_exists="replace", index=False)
89
- db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
90
-
91
- @tool("list_tables")
92
- def list_tables() -> str:
93
- """List all tables in the database."""
94
- return ListSQLDatabaseTool(db=db).invoke("")
95
-
96
- @tool("tables_schema")
97
- def tables_schema(tables: str) -> str:
98
- """Get the schema and sample rows for the specified tables."""
99
- return InfoSQLDatabaseTool(db=db).invoke(tables)
100
-
101
- @tool("execute_sql")
102
- def execute_sql(sql_query: str) -> str:
103
- """Execute a SQL query against the database and return the results."""
104
- return QuerySQLDataBaseTool(db=db).invoke(sql_query)
105
-
106
- @tool("check_sql")
107
- def check_sql(sql_query: str) -> str:
108
- """Validate the SQL query syntax and structure before execution."""
109
- return QuerySQLCheckerTool(db=db, llm=llm).invoke({"query": sql_query})
110
-
111
- sql_dev = Agent(
112
- role="Senior Database Developer",
113
- goal="Extract data using optimized SQL queries.",
114
- backstory="An expert in writing optimized SQL queries for complex databases.",
115
- llm=llm,
116
- tools=[list_tables, tables_schema, execute_sql, check_sql],
117
- )
118
-
119
- data_analyst = Agent(
120
- role="Senior Data Analyst",
121
- goal="Analyze the data and produce insights.",
122
- backstory="A seasoned analyst who identifies trends and patterns in datasets.",
123
- llm=llm,
124
- )
125
-
126
- report_writer = Agent(
127
- role="Technical Report Writer",
128
- goal="Summarize the insights into a clear report.",
129
- backstory="An expert in summarizing data insights into readable reports.",
130
- llm=llm,
131
- )
132
-
133
- extract_data = Task(
134
- description="Extract data based on the query: {query}.",
135
- expected_output="Database results matching the query.",
136
- agent=sql_dev,
137
- )
138
-
139
- analyze_data = Task(
140
- description="Analyze the extracted data for query: {query}.",
141
- expected_output="Analysis text summarizing findings.",
142
- agent=data_analyst,
143
- context=[extract_data],
144
- )
145
-
146
- write_report = Task(
147
- description="Summarize the analysis into an executive report.",
148
- expected_output="Markdown report of insights.",
149
- agent=report_writer,
150
- context=[analyze_data],
151
- )
152
-
153
- crew = Crew(
154
- agents=[sql_dev, data_analyst, report_writer],
155
- tasks=[extract_data, analyze_data, write_report],
156
- process=Process.sequential,
157
- verbose=True,
158
- )
159
-
160
- # UI: Tabs for Query Results and General Insights
161
- tab1, tab2 = st.tabs(["πŸ” Query Insights + Viz", "πŸ“Š Full Data Viz"])
162
-
163
- with tab1:
164
- query = st.text_area("Enter Query:", value="Provide insights into the salary of a Principal Data Scientist.")
165
- if st.button("Submit Query"):
166
- with st.spinner("Processing query..."):
167
- inputs = {"query": query}
168
- result = crew.kickoff(inputs=inputs)
169
- st.markdown("### Analysis Report:")
170
- st.markdown(result)
171
-
172
- # Query-Specific Visualization
173
- if "salary" in query.lower():
174
- fig = px.box(st.session_state.df, x="job_title", y="salary_in_usd", title="Salary Distribution by Job Title")
175
- st.plotly_chart(fig)
176
-
177
- with tab2:
178
- st.subheader("πŸ“Š Comprehensive Data Visualizations")
179
-
180
- fig1 = px.histogram(st.session_state.df, x="job_title", title="Job Title Frequency")
181
- st.plotly_chart(fig1)
182
-
183
- fig2 = px.bar(st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
184
- x="experience_level", y="salary_in_usd", title="Average Salary by Experience Level")
185
- st.plotly_chart(fig2)
186
-
187
- temp_dir.cleanup()
188
- else:
189
- st.info("Please load a dataset to proceed.")
190
-
191
- with st.sidebar:
192
- st.header("πŸ“š Reference:")
193
- st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")