File size: 2,114 Bytes
0888aa2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
title = "Protien Sequence Classification 🧬."
description = "Predicts the subcellular location of the protein sequence between two classes: Cytoplasm and Membrane"
article = 'Created from finetuning ESM2_150M'
model = AutoModelForSequenceClassification.from_pretrained('./Model')
tokenizer = AutoTokenizer.from_pretrained('facebook/esm2_t30_150M_UR50D')
example_list = [['MKIIILLGFLGATLSAPLIPQRLMSASNSNELLLNLNNGQLLPLQLQGPLNSWIPPFSGILQQQQQAQIPGLSQFSLSALDQFAGLLPNQIPLTGEASFAQGAQAGQVDPLQLQTPPQTQPGPSHVMPYVFSFKMPQEQGQMFQYYPVYMVLPWEQPQQTVPRSPQQTRQQQYEEQIPFYAQFGYIPQLAEPAISGGQQQLAFDPQLGTAPEIAVMSTGEEIPYLQKEAINFRHDSAGVFMPSTSPKPSTTNVFTSAVDQTITPELPEEKDKTDSLREP'],
['MSSGNYQQSEALSKPTFSEEQASALVESVFGLKVSKVRPLPSYDDQNFHVYVSKTKDGPTEYVLKISNTKASKNPDLIEVQNHIIMFLKAAGFPTASVCHTKGDNTASLVSVDSGSEIKSYLVRLLTYLPGRPIAELPVSPQLLYEIGKLAAKLDKTLQRFHHPKLSSLHRENFIWNLKNVPLLEKYLYALGQNRNREIVEHVIHLFKEEVMTKLSHFRECINHGDLNDHNILIESSKSASGNAEYQVSGILDFGDMSYGYYVFEVAITIMYMMIESKSPIQVGGHVLAGFESITPLTAVEKGALFLLVCSRFCQSLVMAAYSCQLYPENKDYLMVTAKTGWKHLQQMFDMGQKAVEEIWFETAKSYESGISM'],
['MMNNTDFLMLNNPWNKLCLVSMDFCFPLDFVSNLFWIFASKFIIVTGQIKADFKRTSWEAKAEGSLEPGRLKLQLASIVPLYSSLVTAGPASKIIILKRTSLPTVSPSNERAYLLPVSFTDLAHVFYLSYFSINAKSNSFSLDIIIALGIPHNTQAHFNH'],
['MNKHNLRLVQLASELILIEIIPKLFLSQVTTISHIKREKIPPNHRKGILCMFPWQCVVYVFSNFVWLVIHRFSNGFIQFLGEPYRLMTASGTHGRIKFMVDIPIIKNTQVLRIPVLKDPKMLSKKH']]
def predict(ProtienSequence):
input = tokenizer(ProtienSequence, return_tensors='pt')
with torch.inference_mode():
outputs = model(**input)
output = outputs.logits.argmax(axis=1)[0].numpy() == 0
print(output)
if output:
return str('Cytoplasm')
else:
return str('Membrane')
iface = gr.Interface(fn=predict,
inputs='text',
outputs=gr.Text(label='Subcellular location'),
title=title,
description=description,
article=article,
examples=example_list)
iface.launch() |