Spaces:
Sleeping
Sleeping
1-ARIjitS
commited on
Commit
·
d8b73be
1
Parent(s):
772bbc6
changed to include stats
Browse files- app.py +2 -1
- llm_res.py +51 -21
app.py
CHANGED
@@ -88,8 +88,9 @@ with st.container():
|
|
88 |
status.json(json_of_clinical_trials, expanded=False)
|
89 |
# 7. Use an LLM to get a summary of the clinical trials, in plain text format.
|
90 |
status.write("Getting a summary of the clinical trials...")
|
91 |
-
response = get_short_summary_out_of_json_files(json_of_clinical_trials)
|
92 |
print(f'Response from LLM summarization: {response}')
|
|
|
93 |
status.write(f'Response from LLM summarization: {response}')
|
94 |
# 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
|
95 |
status.write("Getting summary statistics of the clinical trials...")
|
|
|
88 |
status.json(json_of_clinical_trials, expanded=False)
|
89 |
# 7. Use an LLM to get a summary of the clinical trials, in plain text format.
|
90 |
status.write("Getting a summary of the clinical trials...")
|
91 |
+
response, stats_dict = get_short_summary_out_of_json_files(json_of_clinical_trials)
|
92 |
print(f'Response from LLM summarization: {response}')
|
93 |
+
print(f'basic_stats_dict:{stats_dict}')
|
94 |
status.write(f'Response from LLM summarization: {response}')
|
95 |
# 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
|
96 |
status.write("Getting summary statistics of the clinical trials...")
|
llm_res.py
CHANGED
@@ -22,6 +22,8 @@ from langchain_core.pydantic_v1 import BaseModel, Field
|
|
22 |
from langchain_openai import ChatOpenAI
|
23 |
from langchain.chains.llm import LLMChain
|
24 |
from langchain_core.prompts import PromptTemplate
|
|
|
|
|
25 |
|
26 |
load_dotenv()
|
27 |
|
@@ -267,6 +269,26 @@ def get_short_summary_out_of_json_files(data_json):
|
|
267 |
|
268 |
return result
|
269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
def tagging_insights_from_json(data_json):
|
271 |
processed_json= process_dictionaty_with_llm_to_generate_response(data_json)
|
272 |
|
@@ -286,25 +308,25 @@ def tagging_insights_from_json(data_json):
|
|
286 |
# description: str = Field(
|
287 |
# description="text description grouping all the clinical trials using briefDescription and detailedDescription keys"
|
288 |
# )
|
289 |
-
project_title: list = Field(
|
290 |
-
|
291 |
-
)
|
292 |
-
status: list = Field(
|
293 |
-
description="Extract the status of all the clinical trials"
|
294 |
-
)
|
295 |
-
# keywords: list = Field(
|
296 |
-
# description="Extract the most relevant keywords regrouping all the clinical trials"
|
297 |
# )
|
298 |
-
|
299 |
-
|
|
|
|
|
|
|
300 |
)
|
|
|
|
|
|
|
301 |
primary_outcomes: list = Field(
|
302 |
-
description="get the
|
|
|
|
|
|
|
|
|
303 |
)
|
304 |
-
# secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
|
305 |
-
# eligibility: list = Field(
|
306 |
-
# description="get the eligibilityCriteria grouping all the clinical trials"
|
307 |
-
# )
|
308 |
healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
|
309 |
minimum_age: list = Field(
|
310 |
description="get the minimum age from each experiment"
|
@@ -316,12 +338,12 @@ def tagging_insights_from_json(data_json):
|
|
316 |
|
317 |
def get_dict(self):
|
318 |
return {
|
319 |
-
"project_title": self.project_title,
|
320 |
-
"status": self.status,
|
321 |
-
|
322 |
-
"interventions": self.interventions,
|
323 |
"primary_outcomes": self.primary_outcomes,
|
324 |
-
|
325 |
# "eligibility": self.eligibility,
|
326 |
"healthy_volunteers": self.healthy_volunteers,
|
327 |
"minimum_age": self.minimum_age,
|
@@ -342,8 +364,16 @@ def tagging_insights_from_json(data_json):
|
|
342 |
|
343 |
res= tagging_chain.invoke({"input": processed_json})
|
344 |
result_dict= res.get_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
print(f"Result_tagging: {result_dict}")
|
346 |
-
return result_dict
|
347 |
|
348 |
|
349 |
# clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])
|
|
|
22 |
from langchain_openai import ChatOpenAI
|
23 |
from langchain.chains.llm import LLMChain
|
24 |
from langchain_core.prompts import PromptTemplate
|
25 |
+
from collections import Counter
|
26 |
+
import statistics
|
27 |
|
28 |
load_dotenv()
|
29 |
|
|
|
269 |
|
270 |
return result
|
271 |
|
272 |
+
def analyze_data(data):
|
273 |
+
# Extract minimum and maximum ages
|
274 |
+
min_ages = [int(age.split()[0]) for age in data['minimum_age'] if age]
|
275 |
+
max_ages = [int(age.split()[0]) for age in data['maximum_age'] if age]
|
276 |
+
# primary_timeframe= [int(age.split()[0]) for age in data['[primary_outcome]'] if age]
|
277 |
+
|
278 |
+
# Calculate average minimum and maximum ages
|
279 |
+
avg_min_age = statistics.mean(min_ages) if min_ages else None
|
280 |
+
avg_max_age = statistics.mean(max_ages) if max_ages else None
|
281 |
+
|
282 |
+
# Find most common gender
|
283 |
+
gender_counter = Counter(data['gender'])
|
284 |
+
most_common_gender = gender_counter.most_common(1)[0][0]
|
285 |
+
|
286 |
+
# Flatten keywords list and find common keywords
|
287 |
+
keywords = [keyword for sublist in data['keywords'] for keyword in sublist]
|
288 |
+
common_keywords = [word for word, count in Counter(keywords).most_common()]
|
289 |
+
|
290 |
+
return avg_min_age, avg_max_age, most_common_gender, common_keywords
|
291 |
+
|
292 |
def tagging_insights_from_json(data_json):
|
293 |
processed_json= process_dictionaty_with_llm_to_generate_response(data_json)
|
294 |
|
|
|
308 |
# description: str = Field(
|
309 |
# description="text description grouping all the clinical trials using briefDescription and detailedDescription keys"
|
310 |
# )
|
311 |
+
# project_title: list = Field(
|
312 |
+
# description="Extract the project titles of all the clinical trials"
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
# )
|
314 |
+
# status: list = Field(
|
315 |
+
# description="Extract the status of all the clinical trials"
|
316 |
+
# )
|
317 |
+
keywords: list = Field(
|
318 |
+
description="Extract the most relevant keywords for each clinical trials"
|
319 |
)
|
320 |
+
# interventions: list = Field(
|
321 |
+
# description="describe the interventions for each clinical trial using title, name and description"
|
322 |
+
# )
|
323 |
primary_outcomes: list = Field(
|
324 |
+
description="get the timeframe of each clinical trial"
|
325 |
+
)
|
326 |
+
secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
|
327 |
+
eligibility: list = Field(
|
328 |
+
description="get the timeframe of each clinical trial"
|
329 |
)
|
|
|
|
|
|
|
|
|
330 |
healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
|
331 |
minimum_age: list = Field(
|
332 |
description="get the minimum age from each experiment"
|
|
|
338 |
|
339 |
def get_dict(self):
|
340 |
return {
|
341 |
+
# "project_title": self.project_title,
|
342 |
+
# "status": self.status,
|
343 |
+
"keywords": self.keywords,
|
344 |
+
# "interventions": self.interventions,
|
345 |
"primary_outcomes": self.primary_outcomes,
|
346 |
+
"secondary_outcomes": self.secondary_outcomes,
|
347 |
# "eligibility": self.eligibility,
|
348 |
"healthy_volunteers": self.healthy_volunteers,
|
349 |
"minimum_age": self.minimum_age,
|
|
|
364 |
|
365 |
res= tagging_chain.invoke({"input": processed_json})
|
366 |
result_dict= res.get_dict()
|
367 |
+
|
368 |
+
avg_min_age, avg_max_age, most_common_gender, common_keywords= analyze_data(result_dict)
|
369 |
+
|
370 |
+
stats_dict= {'Average Minimum age': avg_min_age,
|
371 |
+
'Average Maximum age': avg_max_age,
|
372 |
+
'Most common gender undergoing the trials': most_common_gender,
|
373 |
+
'common keywords found in the trials': common_keywords}
|
374 |
+
|
375 |
print(f"Result_tagging: {result_dict}")
|
376 |
+
return result_dict, stats_dict
|
377 |
|
378 |
|
379 |
# clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])
|