DrishtiSharma commited on
Commit
898036d
Β·
verified Β·
1 Parent(s): a01037b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -52
app.py CHANGED
@@ -3,8 +3,6 @@ import pandas as pd
3
  import sqlite3
4
  import os
5
  import json
6
- import tempfile
7
- from fpdf import FPDF
8
  from pathlib import Path
9
  import plotly.express as px
10
  from datetime import datetime, timezone
@@ -80,20 +78,10 @@ elif input_option == "Upload CSV File":
80
  except Exception as e:
81
  st.error(f"Error loading file: {e}")
82
 
83
- # Helper Functions for Download
84
- def save_as_txt(content, filename):
85
- with open(filename, "w") as f:
86
- f.write(content)
87
- return filename
88
-
89
- def save_as_pdf(content, filename):
90
- pdf = FPDF()
91
- pdf.add_page()
92
- pdf.set_font("Arial", size=12)
93
- for line in content.split('\n'):
94
- pdf.multi_cell(0, 10, line)
95
- pdf.output(filename)
96
- return filename
97
 
98
  # SQL-RAG Analysis
99
  if st.session_state.df is not None:
@@ -168,20 +156,21 @@ if st.session_state.df is not None:
168
  )
169
 
170
  write_report = Task(
171
- description="Write the analysis report with Introduction, Key Insights, and Analysis. DO NOT include any Conclusion or Summary.",
172
  expected_output="Markdown-formatted report excluding Conclusion.",
173
  agent=report_writer,
174
  context=[analyze_data],
175
  )
176
 
177
  write_conclusion = Task(
178
- description="Write a brief and impactful 3-5 line Conclusion summarizing only the most important insights/findings. Include the max, min, and average salary and highlight the most impactful insights.",
 
179
  expected_output="Markdown-formatted Conclusion/Summary section with key insights and statistics.",
180
  agent=conclusion_writer,
181
  context=[analyze_data],
182
  )
183
 
184
- # Crews for report and conclusion
185
  crew_report = Crew(
186
  agents=[sql_dev, data_analyst, report_writer],
187
  tasks=[extract_data, analyze_data, write_report],
@@ -204,58 +193,71 @@ if st.session_state.df is not None:
204
  query = st.text_area("Enter Query:", value="Provide insights into the salary of a Principal Data Scientist.")
205
  if st.button("Submit Query"):
206
  with st.spinner("Processing query..."):
207
- report_result = crew_report.kickoff(inputs={"query": query + " Provide detailed analysis but DO NOT include Conclusion."})
208
- conclusion_result = crew_conclusion.kickoff(inputs={"query": query + " Provide ONLY the most important insights in 3-5 concise lines."})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- st.markdown(str(report_result) if report_result else "⚠️ No Report Generated.")
 
 
 
 
 
211
 
212
- fig_salary = px.box(st.session_state.df, x="job_title", y="salary_in_usd", title="Salary Distribution by Job Title")
213
- st.plotly_chart(fig_salary, use_container_width=True, key="fig_salary")
214
- st.caption("πŸ“Š Salary distribution across different job titles.")
215
 
216
- fig_experience = px.bar(st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
217
- x="experience_level", y="salary_in_usd", title="Average Salary by Experience Level")
218
- st.plotly_chart(fig_experience, use_container_width=True, key="fig_experience")
219
- st.caption("πŸ“Š Average salary by experience level.")
220
 
221
- fig_employment = px.box(st.session_state.df, x="employment_type", y="salary_in_usd", title="Salary Distribution by Employment Type")
222
- st.plotly_chart(fig_employment, use_container_width=True, key="fig_employment")
223
- st.caption("πŸ“Š Salary distribution across employment types.")
224
 
225
  # Full Data Visualization Tab
226
  with tab2:
227
  st.subheader("πŸ“Š Comprehensive Data Visualizations")
228
 
229
  fig1 = px.histogram(st.session_state.df, x="job_title", title="Job Title Frequency")
230
- st.plotly_chart(fig1, key="fig1")
231
- st.caption("πŸ“Š Frequency of each job title in the dataset.")
232
 
233
- fig2 = px.bar(st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
234
- x="experience_level", y="salary_in_usd", title="Average Salary by Experience Level")
235
- st.plotly_chart(fig2, key="fig2")
236
- st.caption("πŸ“Š Average salary for each experience level.")
 
 
237
 
238
- fig3 = px.box(st.session_state.df, x="employment_type", y="salary_in_usd", title="Salary Distribution by Employment Type")
239
- st.plotly_chart(fig3, key="fig3")
240
- st.caption("πŸ“Š Salary distribution across employment types.")
241
-
242
- # Restored Summary for Tab 2
243
- tab2_content = "Comprehensive Data Visualizations:\n"
244
- tab2_content += "- Job Title Frequency\n"
245
- tab2_content += "- Average Salary by Experience Level\n"
246
- tab2_content += "- Salary Distribution by Employment Type\n"
247
-
248
- tab2_txt = save_as_txt(tab2_content, "Tab2_Visualizations.txt")
249
- tab2_pdf = save_as_pdf(tab2_content, "Tab2_Visualizations.pdf")
250
- st.download_button("πŸ“₯ Download Tab 2 Summary as TXT", open(tab2_txt, "rb"), file_name="Tab2_Visualizations.txt")
251
- st.download_button("πŸ“₯ Download Tab 2 Summary as PDF", open(tab2_pdf, "rb"), file_name="Tab2_Visualizations.pdf")
252
 
253
  temp_dir.cleanup()
254
  else:
255
  st.info("Please load a dataset to proceed.")
256
 
 
257
  # Sidebar Reference
258
  with st.sidebar:
259
  st.header("πŸ“š Reference:")
260
  st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")
261
 
 
 
3
  import sqlite3
4
  import os
5
  import json
 
 
6
  from pathlib import Path
7
  import plotly.express as px
8
  from datetime import datetime, timezone
 
78
  except Exception as e:
79
  st.error(f"Error loading file: {e}")
80
 
81
+ # Show Dataset Preview Only After Loading
82
+ if st.session_state.df is not None and st.session_state.show_preview:
83
+ st.subheader("πŸ“‚ Dataset Preview")
84
+ st.dataframe(st.session_state.df.head())
 
 
 
 
 
 
 
 
 
 
85
 
86
  # SQL-RAG Analysis
87
  if st.session_state.df is not None:
 
156
  )
157
 
158
  write_report = Task(
159
+ description="Write the analysis report with Introduction, Key Insights, and Analysis. DO NOT include any Conclusion or Summary.",
160
  expected_output="Markdown-formatted report excluding Conclusion.",
161
  agent=report_writer,
162
  context=[analyze_data],
163
  )
164
 
165
  write_conclusion = Task(
166
+ description="Write a brief and impactful 3-5 line Conclusion summarizing only the most important insights/findings. Include the max, min, and average salary"
167
+ "and highlight the most impactful insights.",
168
  expected_output="Markdown-formatted Conclusion/Summary section with key insights and statistics.",
169
  agent=conclusion_writer,
170
  context=[analyze_data],
171
  )
172
 
173
+ # Separate Crews for report and conclusion
174
  crew_report = Crew(
175
  agents=[sql_dev, data_analyst, report_writer],
176
  tasks=[extract_data, analyze_data, write_report],
 
193
  query = st.text_area("Enter Query:", value="Provide insights into the salary of a Principal Data Scientist.")
194
  if st.button("Submit Query"):
195
  with st.spinner("Processing query..."):
196
+ # Step 1: Generate the analysis report
197
+ report_inputs = {"query": query + " Provide detailed analysis but DO NOT include Conclusion."}
198
+ report_result = crew_report.kickoff(inputs=report_inputs)
199
+
200
+ # Step 2: Generate only the concise conclusion
201
+ conclusion_inputs = {"query": query + " Provide ONLY the most important insights in 3-5 concise lines."}
202
+ conclusion_result = crew_conclusion.kickoff(inputs=conclusion_inputs)
203
+
204
+ # Step 3: Display the report
205
+ #st.markdown("### Analysis Report:")
206
+ st.markdown(report_result if report_result else "⚠️ No Report Generated.")
207
+
208
+ # Step 4: Generate Visualizations
209
+ visualizations = []
210
+
211
+ fig_salary = px.box(st.session_state.df, x="job_title", y="salary_in_usd",
212
+ title="Salary Distribution by Job Title")
213
+ visualizations.append(fig_salary)
214
 
215
+ fig_experience = px.bar(
216
+ st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
217
+ x="experience_level", y="salary_in_usd",
218
+ title="Average Salary by Experience Level"
219
+ )
220
+ visualizations.append(fig_experience)
221
 
222
+ fig_employment = px.box(st.session_state.df, x="employment_type", y="salary_in_usd",
223
+ title="Salary Distribution by Employment Type")
224
+ visualizations.append(fig_employment)
225
 
226
+ # Step 5: Insert Visual Insights
227
+ st.markdown("#### 5. Visual Insights")
228
+ for fig in visualizations:
229
+ st.plotly_chart(fig, use_container_width=True)
230
 
231
+ # Step 6: Display Concise Conclusion
232
+ #st.markdown("#### 6. Conclusion")
233
+ st.markdown(conclusion_result if conclusion_result else "⚠️ No Conclusion Generated.")
234
 
235
  # Full Data Visualization Tab
236
  with tab2:
237
  st.subheader("πŸ“Š Comprehensive Data Visualizations")
238
 
239
  fig1 = px.histogram(st.session_state.df, x="job_title", title="Job Title Frequency")
240
+ st.plotly_chart(fig1)
 
241
 
242
+ fig2 = px.bar(
243
+ st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
244
+ x="experience_level", y="salary_in_usd",
245
+ title="Average Salary by Experience Level"
246
+ )
247
+ st.plotly_chart(fig2)
248
 
249
+ fig3 = px.box(st.session_state.df, x="employment_type", y="salary_in_usd",
250
+ title="Salary Distribution by Employment Type")
251
+ st.plotly_chart(fig3)
 
 
 
 
 
 
 
 
 
 
 
252
 
253
  temp_dir.cleanup()
254
  else:
255
  st.info("Please load a dataset to proceed.")
256
 
257
+
258
  # Sidebar Reference
259
  with st.sidebar:
260
  st.header("πŸ“š Reference:")
261
  st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")
262
 
263
+