File size: 7,644 Bytes
a8c352a 9ebc9d5 a8c352a 9ebc9d5 a8c352a b450063 a8c352a 9ebc9d5 3f66889 a8c352a 9ebc9d5 415ed22 9ebc9d5 415ed22 cf8a311 a8c352a cf8a311 8681efd cf8a311 8681efd cf8a311 8681efd 3f66889 a8c352a 463a729 ab38aba a8c352a 3644b2f a8c352a 3644b2f b7ac0c0 a8c352a f1b4b0b 463a729 63efb46 ddf910b 25f1f96 9a27eaf a63767e 9a27eaf 25f1f96 9a27eaf a8c352a 9ea6fb2 a8c352a b3f7ddc a8c352a ddf910b a8c352a 28ee649 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import torch
import numpy as np
import gradio as gr
from typing import Dict
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification,BertTokenizer
num_labels=14
#Loading Model and Tokenizer from Hugging Face Spaces
model = BertForSequenceClassification.from_pretrained("owaiskha9654/Multi-Label-Classification-of-PubMed-Articles", num_labels=num_labels)
tokenizer = BertTokenizer.from_pretrained('owaiskha9654/Multi-Label-Classification-of-PubMed-Articles', do_lower_case=True)
def Multi_Label_Classification_of_Pubmed_Articles(model_input: str) -> Dict[str, float]: #This wrapper function will pass the article into the model
dict_custom={}
Preprocess_part1=model_input[:len(model_input)] #splitting inputext into 2 parts
Preprocess_part2=model_input[len(model_input):]
dict1=tokenizer.encode_plus(Preprocess_part1,max_length=1024,padding=True,truncation=True)
dict2=tokenizer.encode_plus(Preprocess_part2,max_length=1024,padding=True,truncation=True)
dict_custom['input_ids']=[dict1['input_ids'],dict1['input_ids']]
dict_custom['token_type_ids']=[dict1['token_type_ids'],dict1['token_type_ids']]
dict_custom['attention_mask']=[dict1['attention_mask'],dict1['attention_mask']]
outs = model(torch.tensor(dict_custom['input_ids']), token_type_ids=None, attention_mask=torch.tensor(dict_custom['attention_mask']))
b_logit_pred = outs[0]
pred_label = torch.sigmoid(b_logit_pred)
ret ={
"Anatomy [A]": float(pred_label[0][0]),
"Organisms [B]": float(pred_label[0][1]),
"Diseases [C]": float(pred_label[0][2]),
"Chemicals and Drugs [D]": float(pred_label[0][3]),
"Analytical, Diagnostic and Therapeutic Techniques, and Equipment [E]": float(pred_label[0][4]),
"Psychiatry and Psychology [F]": float(pred_label[0][5]),
"Phenomena and Processes [G]": float(pred_label[0][6]),
"Disciplines and Occupations [H]": float(pred_label[0][7]),
"Anthropology, Education, Sociology, and Social Phenomena [I]": float(pred_label[0][8]),
"Technology, Industry, and Agriculture [J]": float(pred_label[0][9]),
"Information Science [L]": float(pred_label[0][10]),
"Named Groups [M]": float(pred_label[0][11]),
"Health Care [N]": float(pred_label[0][12]),
"Geographicals [Z]": float(pred_label[0][13])}
return ret
model_input = gr.Textbox("Input text here (Note: This model is trained to classify Medical Articles)", show_label=False)
model_output = gr.Label("Multi Label MeSH(Medical Subheadings) Result", num_top_classes=6, show_label=True, label="MeSH(Medical Subheadings) Labels assigned to this article")
examples = [
(
"A case of a patient with type 1 neurofibromatosis associated with popliteal and coronary artery aneurysms is described in which cross-sectional",
"imaging provided diagnostic information.",
"The aim of this study was to compare the exercise intensity and competition load during Time Trial (TT), Flat (FL), Medium Mountain (MM) and High ",
"Mountain (HM) stages based heart rate (HR) and session rating of perceived exertion (RPE).METHODS: We monitored both HR and RPE of 12 professional ",
"cyclists during two consecutive 21-day cycling races in order to analyze the exercise intensity and competition load (TRIMPHR and TRIMPRPE).",
"RESULTS:The highest (P<0.05) mean HR was found in TT (169±2 bpm) versus those observed in FL (135±1 bpm), MM (139±3 bpm), HM (143±1 bpm)"
),
(
"The association of body mass index (BMI) with blood pressure may be stronger in Asian than non-Asian populations, however, longitudinal studies ",
"with direct comparisons between ethnicities are lacking. We compared the relationship of BMI with incident hypertension over approximately 9.5 years",
" of follow-up in young (24-39 years) and middle-aged (45-64 years) Chinese Asians (n=5354), American Blacks (n=6076) and American Whites (n=13451).",
"We estimated risk differences using logistic regression models and calculated adjusted incidences and incidence differences. ",
"To facilitate comparisons across ethnicities, standardized estimates were calculated using mean covariate values for age, sex, smoking, education",
"and field center, and included the quadratic terms for BMI and age. Weighted least-squares regression models with were constructed to summarize",
"ethnic-specific incidence differences across BMI. Wald statistics and p-values were calculated based on chi-square distributions. The association of",
"BMI with the incidence difference for hypertension was steeper in Chinese (p<0.05) than in American populations during young and middle-adulthood.",
"For example, at a BMI of 25 vs 21 kg/m2 the adjusted incidence differences per 1000 persons (95% CI) in young adults with a BMI of 25 vs those with",
"a BMI of 21 was 83 (36- 130) for Chinese, 50 (26-74) for Blacks and 30 (12-48) for Whites"
)
(
"Human interferon-alpha 2 (hIFN-a2)1 are part of a heterogenous group of proteins with similar biological activities. The three principal subtypes of ",
"IFN-a2 are designated a-2a, a-2b, and a-2c. hIFN-a2b is used to treat several diseases. These include some types of cancer, such as hairy cell leukemia,",
"melanoma, renal cell carcinoma, follicular non-Hodgkin’s lymphoma, chronic myelogenous leukemia, and AIDS-related Kaposi’s sarcoma [2–4]. They are also",
"used to treat genital warts and some kinds of hepatitis. Since it can increase the intensity of antigen expression on certain tumors (ovarian and",
"colorectal carcinomas),hIFN-a2b has potential for diagnostics (imaging) and therapeutics (monoclonal antibodies). Therefore, obtaining large quantities",
"of recombinant hIFN-a2b is important for both clinical studies and mechanistic investigations."
)
]
title = "Multi Label Classification of PubMed Articles using BIO-BERT"
description = "The traditional machine learning models give a lot of pain when we do not have sufficient labeled data for the specific task or domain we care about to train a reliable model. Transfer learning allows us to deal with these scenarios by leveraging the already existing labeled data of some related task or domain. We try to store this knowledge gained in solving the source task in the source domain and apply it to our problem of interest. In this work, I have utilized Transfer Learning utilizing BIO BERT model to fine tune on PubMed MultiLabel classification Dataset from BIOASQ Task A."
text1 = (
"<center> Author: Owais Ahmad, AI Engineer at <b> IBM </b> <a href=\"https://www.linkedin.com/in/owaiskhan9654/\">Visit Profile</a> <br></center>"
"<center> Model Trained Kaggle Kernel <a href=\"https://www.kaggle.com/code/owaiskhan9654/multi-label-classification-of-pubmed-articles\">Link</a> <br></center>"
"<center> Kaggle Profile <a href=\"https://www.kaggle.com/owaiskhan9654\">Link</a> <br> </center>"
"<center> HuggingFace Model Deployed Repository <a href=\"https://huggingface.co./owaiskha9654/Multi-Label-Classification-of-PubMed-Articles\">Link</a> <br></center>"
)
app = gr.Interface(
Multi_Label_Classification_of_Pubmed_Articles,
inputs=model_input,
outputs=model_output,
examples=examples,
title=title,
description=description,
article=text1,
allow_flagging='never',
analytics_enabled=True,
)
app.launch() |