DrishtiSharma commited on
Commit
4b19bd8
Β·
verified Β·
1 Parent(s): a839a9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -38
app.py CHANGED
@@ -98,6 +98,24 @@ if st.session_state.df is not None and st.session_state.show_preview:
98
  st.subheader("πŸ“‚ Dataset Preview")
99
  st.dataframe(st.session_state.df.head())
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  # SQL-RAG Analysis
102
  if st.session_state.df is not None:
103
  temp_dir = tempfile.TemporaryDirectory()
@@ -171,21 +189,20 @@ if st.session_state.df is not None:
171
  )
172
 
173
  write_report = Task(
174
- description="Write the analysis report with Introduction, Key Insights, and Analysis. DO NOT include any Conclusion or Summary.",
175
  expected_output="Markdown-formatted report excluding Conclusion.",
176
  agent=report_writer,
177
  context=[analyze_data],
178
  )
179
 
180
  write_conclusion = Task(
181
- description="Write a brief and impactful 3-5 line Conclusion summarizing only the most important insights/findings. Include the max, min, and average salary"
182
- "and highlight the most impactful insights.",
183
  expected_output="Markdown-formatted Conclusion/Summary section with key insights and statistics.",
184
  agent=conclusion_writer,
185
  context=[analyze_data],
186
  )
187
 
188
- # Separate Crews for report and conclusion
189
  crew_report = Crew(
190
  agents=[sql_dev, data_analyst, report_writer],
191
  tasks=[extract_data, analyze_data, write_report],
@@ -208,43 +225,33 @@ if st.session_state.df is not None:
208
  query = st.text_area("Enter Query:", value="Provide insights into the salary of a Principal Data Scientist.")
209
  if st.button("Submit Query"):
210
  with st.spinner("Processing query..."):
211
- # Step 1: Generate the analysis report
212
  report_inputs = {"query": query + " Provide detailed analysis but DO NOT include Conclusion."}
213
  report_result = crew_report.kickoff(inputs=report_inputs)
214
 
215
- # Step 2: Generate only the concise conclusion
216
  conclusion_inputs = {"query": query + " Provide ONLY the most important insights in 3-5 concise lines."}
217
  conclusion_result = crew_conclusion.kickoff(inputs=conclusion_inputs)
218
 
219
- # Step 3: Display the report
220
- #st.markdown("### Analysis Report:")
221
  st.markdown(report_result if report_result else "⚠️ No Report Generated.")
222
 
223
- # Step 4: Generate Visualizations
224
- visualizations = []
 
 
 
225
 
226
- fig_salary = px.box(st.session_state.df, x="job_title", y="salary_in_usd",
227
- title="Salary Distribution by Job Title")
228
- visualizations.append(fig_salary)
229
 
230
- fig_experience = px.bar(
231
- st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
232
- x="experience_level", y="salary_in_usd",
233
- title="Average Salary by Experience Level"
234
- )
235
- visualizations.append(fig_experience)
236
 
237
- fig_employment = px.box(st.session_state.df, x="employment_type", y="salary_in_usd",
238
- title="Salary Distribution by Employment Type")
239
- visualizations.append(fig_employment)
240
 
241
- # Step 5: Insert Visual Insights
242
- st.markdown("#### 5. Visual Insights")
243
- for fig in visualizations:
244
- st.plotly_chart(fig, use_container_width=True)
245
-
246
- # Step 6: Display Concise Conclusion
247
- #st.markdown("#### 6. Conclusion")
248
  st.markdown(conclusion_result if conclusion_result else "⚠️ No Conclusion Generated.")
249
 
250
  # Full Data Visualization Tab
@@ -253,23 +260,30 @@ if st.session_state.df is not None:
253
 
254
  fig1 = px.histogram(st.session_state.df, x="job_title", title="Job Title Frequency")
255
  st.plotly_chart(fig1)
 
256
 
257
- fig2 = px.bar(
258
- st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
259
- x="experience_level", y="salary_in_usd",
260
- title="Average Salary by Experience Level"
261
- )
262
  st.plotly_chart(fig2)
 
263
 
264
- fig3 = px.box(st.session_state.df, x="employment_type", y="salary_in_usd",
265
- title="Salary Distribution by Employment Type")
266
  st.plotly_chart(fig3)
 
 
 
 
 
 
 
 
 
 
 
267
 
268
  temp_dir.cleanup()
269
  else:
270
  st.info("Please load a dataset to proceed.")
271
-
272
-
273
  # Sidebar Reference
274
  with st.sidebar:
275
  st.header("πŸ“š Reference:")
 
98
  st.subheader("πŸ“‚ Dataset Preview")
99
  st.dataframe(st.session_state.df.head())
100
 
101
+ import tempfile
102
+ from fpdf import FPDF
103
+
104
+ # Helper Functions for Download
105
+ def save_as_txt(content, filename):
106
+ with open(filename, "w") as f:
107
+ f.write(content)
108
+ return filename
109
+
110
+ def save_as_pdf(content, filename):
111
+ pdf = FPDF()
112
+ pdf.add_page()
113
+ pdf.set_font("Arial", size=12)
114
+ for line in content.split('\n'):
115
+ pdf.multi_cell(0, 10, line)
116
+ pdf.output(filename)
117
+ return filename
118
+
119
  # SQL-RAG Analysis
120
  if st.session_state.df is not None:
121
  temp_dir = tempfile.TemporaryDirectory()
 
189
  )
190
 
191
  write_report = Task(
192
+ description="Write the analysis report with Introduction, Key Insights, and Analysis. DO NOT include any Conclusion or Summary.",
193
  expected_output="Markdown-formatted report excluding Conclusion.",
194
  agent=report_writer,
195
  context=[analyze_data],
196
  )
197
 
198
  write_conclusion = Task(
199
+ description="Write a brief and impactful 3-5 line Conclusion summarizing only the most important insights/findings. Include the max, min, and average salary and highlight the most impactful insights.",
 
200
  expected_output="Markdown-formatted Conclusion/Summary section with key insights and statistics.",
201
  agent=conclusion_writer,
202
  context=[analyze_data],
203
  )
204
 
205
+ # Crews for report and conclusion
206
  crew_report = Crew(
207
  agents=[sql_dev, data_analyst, report_writer],
208
  tasks=[extract_data, analyze_data, write_report],
 
225
  query = st.text_area("Enter Query:", value="Provide insights into the salary of a Principal Data Scientist.")
226
  if st.button("Submit Query"):
227
  with st.spinner("Processing query..."):
 
228
  report_inputs = {"query": query + " Provide detailed analysis but DO NOT include Conclusion."}
229
  report_result = crew_report.kickoff(inputs=report_inputs)
230
 
 
231
  conclusion_inputs = {"query": query + " Provide ONLY the most important insights in 3-5 concise lines."}
232
  conclusion_result = crew_conclusion.kickoff(inputs=conclusion_inputs)
233
 
 
 
234
  st.markdown(report_result if report_result else "⚠️ No Report Generated.")
235
 
236
+ if report_result:
237
+ tab1_txt = save_as_txt(report_result, "Tab1_Report.txt")
238
+ tab1_pdf = save_as_pdf(report_result, "Tab1_Report.pdf")
239
+ st.download_button("Download Tab 1 Report as TXT", open(tab1_txt, "rb"), file_name="Tab1_Report.txt")
240
+ st.download_button("Download Tab 1 Report as PDF", open(tab1_pdf, "rb"), file_name="Tab1_Report.pdf")
241
 
242
+ fig_salary = px.box(st.session_state.df, x="job_title", y="salary_in_usd", title="Salary Distribution by Job Title")
243
+ st.plotly_chart(fig_salary)
244
+ st.caption("πŸ“Š Salary distribution across different job titles.")
245
 
246
+ fig_experience = px.bar(st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
247
+ x="experience_level", y="salary_in_usd", title="Average Salary by Experience Level")
248
+ st.plotly_chart(fig_experience)
249
+ st.caption("πŸ“Š Average salary based on experience level.")
 
 
250
 
251
+ fig_employment = px.box(st.session_state.df, x="employment_type", y="salary_in_usd", title="Salary Distribution by Employment Type")
252
+ st.plotly_chart(fig_employment)
253
+ st.caption("πŸ“Š Salary distribution across employment types.")
254
 
 
 
 
 
 
 
 
255
  st.markdown(conclusion_result if conclusion_result else "⚠️ No Conclusion Generated.")
256
 
257
  # Full Data Visualization Tab
 
260
 
261
  fig1 = px.histogram(st.session_state.df, x="job_title", title="Job Title Frequency")
262
  st.plotly_chart(fig1)
263
+ st.caption("πŸ“Š Frequency of each job title in the dataset.")
264
 
265
+ fig2 = px.bar(st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
266
+ x="experience_level", y="salary_in_usd", title="Average Salary by Experience Level")
 
 
 
267
  st.plotly_chart(fig2)
268
+ st.caption("πŸ“Š Average salary for each experience level.")
269
 
270
+ fig3 = px.box(st.session_state.df, x="employment_type", y="salary_in_usd", title="Salary Distribution by Employment Type")
 
271
  st.plotly_chart(fig3)
272
+ st.caption("πŸ“Š Salary distribution for each employment type.")
273
+
274
+ tab2_content = "Comprehensive Data Visualizations:\n"
275
+ tab2_content += "- Job Title Frequency\n"
276
+ tab2_content += "- Average Salary by Experience Level\n"
277
+ tab2_content += "- Salary Distribution by Employment Type\n"
278
+
279
+ tab2_txt = save_as_txt(tab2_content, "Tab2_Visualizations.txt")
280
+ tab2_pdf = save_as_pdf(tab2_content, "Tab2_Visualizations.pdf")
281
+ st.download_button("Download Tab 2 Summary as TXT", open(tab2_txt, "rb"), file_name="Tab2_Visualizations.txt")
282
+ st.download_button("Download Tab 2 Summary as PDF", open(tab2_pdf, "rb"), file_name="Tab2_Visualizations.pdf")
283
 
284
  temp_dir.cleanup()
285
  else:
286
  st.info("Please load a dataset to proceed.")
 
 
287
  # Sidebar Reference
288
  with st.sidebar:
289
  st.header("πŸ“š Reference:")