lgq12697 commited on
Commit
a213a35
·
verified ·
1 Parent(s): 3c9c971

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
3
+ import torch.nn.functional as F
4
+
5
+ placeholder = 'ACATGCTAAATTAGTTGGCAATTTTTTCTCAGGTAGCTGGGCACAATTTGGTAGTCCAGTTGAACAAAATCCATTAGCTTCTTTTAGCAAGTCCCCTGGTTTGGGCCCTGCCAGTCCCATTAATACCAACCATTTGTCTGGATTGGCTGCAATTCTTTCCCCACAAGCAACAACCTCTACCAAGATTGCACCGATTGGCAAGGACCCTGGAAGGGCTGCAAATCAGATGTTTTCTAACTCTGGATCAACACAAGGAGCAGCTTTTCAGCATTCTATATCCTTTCCTGAGCAAAATGTAAAGGCAAGTCCTAGGCCTATATCTACTTTTGGTGAATCAAGTTCTAGTGCATCAAGTATTGGAACACTGTCCGGTCCTCAATTTCTTTGGGGAAGCCCAACTCCTTACTCTGAGCATTCAAACACTTCTGCCTGGTCTTCATCTTCGGTGGGGCTTCCATTTACATCTAGTGTCCAAAGGCAGGGTTTCCCATATACTAGTAATCACAGTCCTTTTCTTGGCTCCCACTCTCATCATCATGTTGGATCTGCTCCATCTGGCCTTCCGCTTGATAGGCATTTTAGCTACTTCCCTGAGTCACCTGAAGCTTCTCTCATGAGCCCGGTTGCATTTGGGAATTTAAATCACGGTGATGGGAATTTTATGATGAACAACATTAGTGCTCGTGCATCTGTAGGAGCCGGTGTTGGTCTTTCTGGAAATACCCCTGAAATTAGTTCACCCAATTTCAGAATGATGTCTCTGCCTAGGCATGGTTCCTTGTTCCATGGAAATAGTTTGTATTCTGGACCTGGAGCAACTAACATTGAGGGATTAGCTGAACGTGGACGAAGTAGACGACCTGAAAATGGTGGGAACCAAATTGATAGTAAGAAGCTGTACCAGCTTGATCTTGACAAAATCGTCTGTGGTGAAGATACAAGGACTACTTTAATGATTAAAAACATTCCTAACAAGTAAGAATAACTAAACATCTATCCT'
6
+ model_names = ['plant-dnabert', 'plant-dnagpt', 'plant-nucleotide-transformer', 'plant-dnagemma',
7
+ 'dnabert2', 'nucleotide-transformer-v2-100m', 'agront-1b']
8
+ task_map = {
9
+ "promoter": ["Not promoter", "Core promoter"],
10
+ "conservation": ["Not conserved", "Conserved"],
11
+ "H3K27ac": ["Not H3K27ac", "H3K27ac"],
12
+ "H3K27me3": ["Not H3K27me3", "H3K27me3"],
13
+ "H3K4me3": ["Not H3K4me3", "H3K4me3"],
14
+ "lncRNAs": ["Not lncRNA", "lncRNA"],
15
+ "open_chromatin": ['Not open chromatin', 'Full open chromatin', 'Partial open chromatin'],
16
+ }
17
+ task_lists = task_map.keys()
18
+
19
+ def inference(seq,model,task):
20
+ if not seq:
21
+ gr.Warning("No sequence provided, use the default sequence.")
22
+ seq = placeholder
23
+ # Load model and tokenizer
24
+ model_name = f'zhangtaolab/{model}-{task}'
25
+ model = AutoModelForSequenceClassification.from_pretrained(model_name,ignore_mismatched_sizes=True)
26
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
27
+
28
+ # Inference
29
+ inputs = tokenizer(seq, return_tensors='pt', padding=True, truncation=True, max_length=512)
30
+ outputs = model(**inputs)
31
+ probabilities = F.softmax(outputs.logits,dim=-1).tolist()[0]
32
+ #Map probabilities to labels
33
+ labels = task_map[task]
34
+ result = {labels[i]: probabilities[i] for i in range(len(labels))}
35
+ return result
36
+
37
+
38
+ # Create Gradio interface
39
+ with gr.Blocks() as demo:
40
+ gr.HTML(
41
+ """
42
+ <h1 style="text-align: center;">Prediction of sequence conservation in plant with LLMs</h1>
43
+ """
44
+ )
45
+ with gr.Row():
46
+ drop1 = gr.Dropdown(choices=task_lists,
47
+ label="Selected Task",
48
+ interactive=False,
49
+ value='conservation')
50
+ drop2 = gr.Dropdown(choices=model_names,
51
+ label="Select Model",
52
+ interactive=True,
53
+ value=model_names[0])
54
+ seq_input = gr.Textbox(label="Input Sequence", lines=6, placeholder=placeholder)
55
+ with gr.Row():
56
+ predict_btn = gr.Button("Predict",variant="primary")
57
+ clear_btn = gr.Button("Clear")
58
+ output = gr.Label(label="Predict result")
59
+
60
+ predict_btn.click(inference, inputs=[seq_input,drop2, drop1], outputs=output)
61
+ clear_btn.click(lambda: ("", None), inputs=[], outputs=[seq_input, output])
62
+
63
+ # Launch Gradio app
64
+ demo.launch()