Spaces:
Sleeping
Sleeping
import os | |
import pandas as pd | |
#from llama_index.llms.replicate import Replicate | |
import requests | |
import re | |
def extract_japan_cities(text): | |
# ζ£θ¦θ‘¨ηΎγδ½Ώη¨γγ¦ " - Japan" γ§η΅γγι½εΈεγζ½εΊ | |
pattern = r'(\b\w+\s*\w*\b) - Japan' | |
cities = re.findall(pattern, text) | |
unique_cities = list(set(cities)) | |
# γ¦γγΌγ―γͺι½εΈεγγ½γΌγγγ¦γ«γ³γγ§εΊεγγγζεεγ«ε€ζ | |
unique_cities.sort() | |
return ', '.join(unique_cities) | |
def fetch_clinical_trials(cancer_name): | |
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name) | |
# Initial URL for the first API call | |
base_url = "https://clinicaltrials.gov/api/v2/studies" | |
params = { | |
"query.titles": search_expr, | |
"pageSize": 100 | |
} | |
# Initialize an empty list to store the data | |
data_list = [] | |
# Loop until there is no nextPageToken | |
while True: | |
# Print the current URL (for debugging purposes) | |
print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()])) | |
# Send a GET request to the API | |
response = requests.get(base_url, params=params) | |
# Check if the request was successful | |
if response.status_code == 200: | |
data = response.json() # Parse JSON response | |
studies = data.get('studies', []) # Extract the list of studies | |
# Loop through each study and extract specific information | |
for study in studies: | |
# Safely access nested keys | |
nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown') | |
startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date') | |
conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed'])) | |
title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title') | |
summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary') | |
# Extract locations safely | |
locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', []) | |
locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed" | |
JapanesLocations = extract_japan_cities(locations) | |
# Extract dates and phases | |
primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date') | |
phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available'])) | |
eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown') | |
# Append the data to the list as a dictionary | |
data_list.append({ | |
"NCTID": nctId, | |
"Title": title, | |
#"Start Date": startDate, | |
"Primary Completion Date": primaryCompletionDate, | |
#"Conditions": conditions, | |
"Cancer": conditions, | |
"Summary": summary, | |
"Japanes Locations": JapanesLocations, | |
#"Phases": phases, | |
"Eligibility Criteria": eligibilityCriteria | |
}) | |
# Check for nextPageToken and update the params or break the loop | |
nextPageToken = data.get('nextPageToken') | |
if nextPageToken: | |
params['pageToken'] = nextPageToken # Set the pageToken for the next request | |
else: | |
break # Exit the loop if no nextPageToken is present | |
else: | |
print("Failed to fetch data. Status code:", response.status_code) | |
break | |
# Create a DataFrame from the list of dictionaries | |
df = pd.DataFrame(data_list) | |
return df | |
def fetch_clinical_trials_jp(cancer_name): | |
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name) | |
# Initial URL for the first API call | |
base_url = "https://clinicaltrials.gov/api/v2/studies" | |
params = { | |
"query.titles": search_expr, | |
"pageSize": 100 | |
} | |
# Initialize an empty list to store the data | |
data_list = [] | |
# Loop until there is no nextPageToken | |
while True: | |
# Print the current URL (for debugging purposes) | |
print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()])) | |
# Send a GET request to the API | |
response = requests.get(base_url, params=params) | |
# Check if the request was successful | |
if response.status_code == 200: | |
data = response.json() # Parse JSON response | |
studies = data.get('studies', []) # Extract the list of studies | |
# Loop through each study and extract specific information | |
for study in studies: | |
# Safely access nested keys | |
nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown') | |
startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date') | |
conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed'])) | |
title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title') | |
summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary') | |
# Extract locations safely | |
locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', []) | |
locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed" | |
JapanesLocations = extract_japan_cities(locations) | |
# Extract dates and phases | |
primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date') | |
phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available'])) | |
eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown') | |
# Append the data to the list as a dictionary | |
data_list.append({ | |
"NCTID": nctId, | |
"γΏγ€γγ«": title, | |
#"Start Date": startDate, | |
#"Primary Completion Date": primaryCompletionDate, | |
"対豑γ¨γͺγη": conditions, | |
"γ΅γγͺγΌ": summary, | |
"ε ΄ζ": JapanesLocations, | |
#"Phases": phases, | |
"γ―γ©γ€γγͺγ’": eligibilityCriteria | |
}) | |
# Check for nextPageToken and update the params or break the loop | |
nextPageToken = data.get('nextPageToken') | |
if nextPageToken: | |
params['pageToken'] = nextPageToken # Set the pageToken for the next request | |
else: | |
break # Exit the loop if no nextPageToken is present | |
else: | |
print("Failed to fetch data. Status code:", response.status_code) | |
break | |
# Create a DataFrame from the list of dictionaries | |
df = pd.DataFrame(data_list) | |
return df | |