DrishtiSharma commited on
Commit
def49fe
Β·
verified Β·
1 Parent(s): 186f364

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -24
app.py CHANGED
@@ -30,7 +30,6 @@ llm = None
30
  # Model Selection
31
  model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
32
 
33
-
34
  # API Key Validation and LLM Initialization
35
  groq_api_key = os.getenv("GROQ_API_KEY")
36
  openai_api_key = os.getenv("OPENAI_API_KEY")
@@ -51,9 +50,12 @@ elif model_choice == "GPT-4o":
51
  # Initialize session state for data persistence
52
  if "df" not in st.session_state:
53
  st.session_state.df = None
 
 
54
 
55
  # Dataset Input
56
  input_option = st.radio("Select Dataset Input:", ["Use Hugging Face Dataset", "Upload CSV File"])
 
57
  if input_option == "Use Hugging Face Dataset":
58
  dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="Einstellung/demo-salaries")
59
  if st.button("Load Dataset"):
@@ -61,16 +63,25 @@ if input_option == "Use Hugging Face Dataset":
61
  with st.spinner("Loading dataset..."):
62
  dataset = load_dataset(dataset_name, split="train")
63
  st.session_state.df = pd.DataFrame(dataset)
 
64
  st.success(f"Dataset '{dataset_name}' loaded successfully!")
65
- st.dataframe(st.session_state.df.head())
66
  except Exception as e:
67
  st.error(f"Error: {e}")
 
68
  elif input_option == "Upload CSV File":
69
  uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"])
70
  if uploaded_file:
71
- st.session_state.df = pd.read_csv(uploaded_file)
72
- st.success("File uploaded successfully!")
73
- st.dataframe(st.session_state.df.head())
 
 
 
 
 
 
 
 
74
 
75
  # SQL-RAG Analysis
76
  if st.session_state.df is not None:
@@ -100,6 +111,7 @@ if st.session_state.df is not None:
100
  """Validate the SQL query syntax and structure before execution."""
101
  return QuerySQLCheckerTool(db=db, llm=llm).invoke({"query": sql_query})
102
 
 
103
  sql_dev = Agent(
104
  role="Senior Database Developer",
105
  goal="Extract data using optimized SQL queries.",
@@ -117,11 +129,19 @@ if st.session_state.df is not None:
117
 
118
  report_writer = Agent(
119
  role="Technical Report Writer",
120
- goal="Summarize the insights into a clear report.",
121
- backstory="An expert in summarizing data insights into readable reports.",
122
  llm=llm,
123
  )
124
 
 
 
 
 
 
 
 
 
125
  extract_data = Task(
126
  description="Extract data based on the query: {query}.",
127
  expected_output="Database results matching the query.",
@@ -130,56 +150,116 @@ if st.session_state.df is not None:
130
 
131
  analyze_data = Task(
132
  description="Analyze the extracted data for query: {query}.",
133
- expected_output="Analysis text summarizing findings.",
134
  agent=data_analyst,
135
  context=[extract_data],
136
  )
137
 
138
  write_report = Task(
139
- description="Summarize the analysis into an executive report.",
140
- expected_output="Markdown report of insights.",
141
  agent=report_writer,
142
  context=[analyze_data],
143
  )
144
 
145
- crew = Crew(
 
 
 
 
 
 
 
 
 
 
 
146
  agents=[sql_dev, data_analyst, report_writer],
147
  tasks=[extract_data, analyze_data, write_report],
148
  process=Process.sequential,
149
  verbose=True,
150
  )
151
 
152
- # UI: Tabs for Query Results and General Insights
 
 
 
 
 
 
 
153
  tab1, tab2 = st.tabs(["πŸ” Query Insights + Viz", "πŸ“Š Full Data Viz"])
154
 
 
155
  with tab1:
156
  query = st.text_area("Enter Query:", value="Provide insights into the salary of a Principal Data Scientist.")
157
  if st.button("Submit Query"):
158
  with st.spinner("Processing query..."):
159
- inputs = {"query": query}
160
- result = crew.kickoff(inputs=inputs)
161
- st.markdown("### Analysis Report:")
162
- st.markdown(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- # Query-Specific Visualization
165
- if "salary" in query.lower():
166
- fig = px.box(st.session_state.df, x="job_title", y="salary_in_usd", title="Salary Distribution by Job Title")
167
- st.plotly_chart(fig)
168
 
 
 
 
 
 
 
 
 
 
 
169
  with tab2:
170
  st.subheader("πŸ“Š Comprehensive Data Visualizations")
171
-
172
  fig1 = px.histogram(st.session_state.df, x="job_title", title="Job Title Frequency")
173
  st.plotly_chart(fig1)
174
 
175
- fig2 = px.bar(st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
176
- x="experience_level", y="salary_in_usd", title="Average Salary by Experience Level")
 
 
 
177
  st.plotly_chart(fig2)
178
 
 
 
 
 
179
  temp_dir.cleanup()
180
  else:
181
  st.info("Please load a dataset to proceed.")
182
 
 
 
183
  with st.sidebar:
184
  st.header("πŸ“š Reference:")
185
- st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")
 
 
 
30
  # Model Selection
31
  model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
32
 
 
33
  # API Key Validation and LLM Initialization
34
  groq_api_key = os.getenv("GROQ_API_KEY")
35
  openai_api_key = os.getenv("OPENAI_API_KEY")
 
50
  # Initialize session state for data persistence
51
  if "df" not in st.session_state:
52
  st.session_state.df = None
53
+ if "show_preview" not in st.session_state:
54
+ st.session_state.show_preview = False
55
 
56
  # Dataset Input
57
  input_option = st.radio("Select Dataset Input:", ["Use Hugging Face Dataset", "Upload CSV File"])
58
+
59
  if input_option == "Use Hugging Face Dataset":
60
  dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="Einstellung/demo-salaries")
61
  if st.button("Load Dataset"):
 
63
  with st.spinner("Loading dataset..."):
64
  dataset = load_dataset(dataset_name, split="train")
65
  st.session_state.df = pd.DataFrame(dataset)
66
+ st.session_state.show_preview = True # Show preview after loading
67
  st.success(f"Dataset '{dataset_name}' loaded successfully!")
 
68
  except Exception as e:
69
  st.error(f"Error: {e}")
70
+
71
  elif input_option == "Upload CSV File":
72
  uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"])
73
  if uploaded_file:
74
+ try:
75
+ st.session_state.df = pd.read_csv(uploaded_file)
76
+ st.session_state.show_preview = True # Show preview after loading
77
+ st.success("File uploaded successfully!")
78
+ except Exception as e:
79
+ st.error(f"Error loading file: {e}")
80
+
81
+ # Show Dataset Preview Only After Loading
82
+ if st.session_state.df is not None and st.session_state.show_preview:
83
+ st.subheader("πŸ“‚ Dataset Preview")
84
+ st.dataframe(st.session_state.df.head())
85
 
86
  # SQL-RAG Analysis
87
  if st.session_state.df is not None:
 
111
  """Validate the SQL query syntax and structure before execution."""
112
  return QuerySQLCheckerTool(db=db, llm=llm).invoke({"query": sql_query})
113
 
114
+ # Agents for SQL data extraction and analysis
115
  sql_dev = Agent(
116
  role="Senior Database Developer",
117
  goal="Extract data using optimized SQL queries.",
 
129
 
130
  report_writer = Agent(
131
  role="Technical Report Writer",
132
+ goal="Write a structured report with Introduction and Key Insights. DO NOT include any Conclusion or Summary.",
133
+ backstory="Specializes in detailed analytical reports without conclusions.",
134
  llm=llm,
135
  )
136
 
137
+ conclusion_writer = Agent(
138
+ role="Conclusion Specialist",
139
+ goal="Summarize findings into a clear and concise 3-5 line Conclusion highlighting only the most important insights.",
140
+ backstory="An expert in crafting impactful and clear conclusions.",
141
+ llm=llm,
142
+ )
143
+
144
+ # Define tasks for report and conclusion
145
  extract_data = Task(
146
  description="Extract data based on the query: {query}.",
147
  expected_output="Database results matching the query.",
 
150
 
151
  analyze_data = Task(
152
  description="Analyze the extracted data for query: {query}.",
153
+ expected_output="Key Insights and Analysis without any Introduction or Conclusion.",
154
  agent=data_analyst,
155
  context=[extract_data],
156
  )
157
 
158
  write_report = Task(
159
+ description="Write the analysis report with Introduction and Key Insights. DO NOT include any Conclusion or Summary.",
160
+ expected_output="Markdown-formatted report excluding Conclusion.",
161
  agent=report_writer,
162
  context=[analyze_data],
163
  )
164
 
165
+ write_conclusion = Task(
166
+ description="Summarize the key findings in 3-5 impactful lines, highlighting the maximum, minimum, and average salaries."
167
+ "Emphasize significant insights on salary distribution and influential compensation trends for strategic decision-making.",
168
+ expected_output="Markdown-formatted Conclusion section with key insights and statistics.",
169
+ agent=conclusion_writer,
170
+ context=[analyze_data],
171
+ )
172
+
173
+
174
+
175
+ # Separate Crews for report and conclusion
176
+ crew_report = Crew(
177
  agents=[sql_dev, data_analyst, report_writer],
178
  tasks=[extract_data, analyze_data, write_report],
179
  process=Process.sequential,
180
  verbose=True,
181
  )
182
 
183
+ crew_conclusion = Crew(
184
+ agents=[data_analyst, conclusion_writer],
185
+ tasks=[write_conclusion],
186
+ process=Process.sequential,
187
+ verbose=True,
188
+ )
189
+
190
+ # Tabs for Query Results and Visualizations
191
  tab1, tab2 = st.tabs(["πŸ” Query Insights + Viz", "πŸ“Š Full Data Viz"])
192
 
193
+ # Query Insights + Visualization
194
  with tab1:
195
  query = st.text_area("Enter Query:", value="Provide insights into the salary of a Principal Data Scientist.")
196
  if st.button("Submit Query"):
197
  with st.spinner("Processing query..."):
198
+ # Step 1: Generate the analysis report
199
+ report_inputs = {"query": query + " Provide detailed analysis but DO NOT include Conclusion."}
200
+ report_result = crew_report.kickoff(inputs=report_inputs)
201
+
202
+ # Step 2: Generate only the concise conclusion
203
+ conclusion_inputs = {"query": query + " Provide ONLY the most important insights in 3-5 concise lines."}
204
+ conclusion_result = crew_conclusion.kickoff(inputs=conclusion_inputs)
205
+
206
+ # Step 3: Display the report
207
+ #st.markdown("### Analysis Report:")
208
+ st.markdown(report_result if report_result else "⚠️ No Report Generated.")
209
+
210
+ # Step 4: Generate Visualizations
211
+ visualizations = []
212
+
213
+ fig_salary = px.box(st.session_state.df, x="job_title", y="salary_in_usd",
214
+ title="Salary Distribution by Job Title")
215
+ visualizations.append(fig_salary)
216
+
217
+ fig_experience = px.bar(
218
+ st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
219
+ x="experience_level", y="salary_in_usd",
220
+ title="Average Salary by Experience Level"
221
+ )
222
+ visualizations.append(fig_experience)
223
 
224
+ fig_employment = px.box(st.session_state.df, x="employment_type", y="salary_in_usd",
225
+ title="Salary Distribution by Employment Type")
226
+ visualizations.append(fig_employment)
 
227
 
228
+ # Step 5: Insert Visual Insights
229
+ st.markdown("### Visual Insights")
230
+ for fig in visualizations:
231
+ st.plotly_chart(fig, use_container_width=True)
232
+
233
+ # Step 6: Display Concise Conclusion
234
+ #st.markdown("#### Conclusion")
235
+ st.markdown(conclusion_result if conclusion_result else "⚠️ No Conclusion Generated.")
236
+
237
+ # Full Data Visualization Tab
238
  with tab2:
239
  st.subheader("πŸ“Š Comprehensive Data Visualizations")
240
+
241
  fig1 = px.histogram(st.session_state.df, x="job_title", title="Job Title Frequency")
242
  st.plotly_chart(fig1)
243
 
244
+ fig2 = px.bar(
245
+ st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
246
+ x="experience_level", y="salary_in_usd",
247
+ title="Average Salary by Experience Level"
248
+ )
249
  st.plotly_chart(fig2)
250
 
251
+ fig3 = px.box(st.session_state.df, x="employment_type", y="salary_in_usd",
252
+ title="Salary Distribution by Employment Type")
253
+ st.plotly_chart(fig3)
254
+
255
  temp_dir.cleanup()
256
  else:
257
  st.info("Please load a dataset to proceed.")
258
 
259
+
260
+ # Sidebar Reference
261
  with st.sidebar:
262
  st.header("πŸ“š Reference:")
263
+ st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")
264
+
265
+