DifeiT commited on
Commit
25fb92c
1 Parent(s): 9232c4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -5
app.py CHANGED
@@ -1,6 +1,7 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
  Updated on 09/13/2023
 
4
  """
5
 
6
  import streamlit as st
@@ -85,7 +86,7 @@ with st.form(key="my_form"):
85
  'checkpoint_path': './vocab/bioformer-cased-v1.0/bioformer-cased-v1.0-model.ckpt-2000000',
86
  'vocab_path': './vocab/bioformer-cased-v1.0/vocab.txt'}
87
 
88
- modelfile = './vocab/bioformer.h5'
89
 
90
  elif model == '2':
91
  vocabfiles = {'labelfile': './dict_new_hpo/lable.vocab',
@@ -99,6 +100,19 @@ with st.form(key="my_form"):
99
 
100
  modelfile='./vocab/bioformer_p5n5_b64_1e-5_95_hponew3.h5'
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  biotag_dic=dic_ont(ontfiles)
103
 
104
  nn_model=bioTag_Bioformer(vocabfiles)
@@ -107,6 +121,7 @@ with st.form(key="my_form"):
107
 
108
  nn_model1, biotag_dic1 = load_model(model='1')
109
  nn_model2, biotag_dic2 = load_model(model='2')
 
110
 
111
  else:
112
  @st.cache_resource
@@ -193,10 +208,9 @@ para_set={
193
  st.markdown("")
194
  st.markdown("## ⏳ Tagging results:")
195
  with st.spinner('Wait for tagging...'):
196
-
197
  tag_result1=bioTag(doc,biotag_dic1,nn_model1,onlyLongest=para_set['onlyLongest'], abbrRecog=para_set['abbrRecog'],Threshold=para_set['ML_Threshold'])
198
  tag_result2=bioTag(doc,biotag_dic2,nn_model2,onlyLongest=para_set['onlyLongest'], abbrRecog=para_set['abbrRecog'],Threshold=para_set['ML_Threshold'])
199
-
200
 
201
  st.markdown('<font style="color: rgb(128, 128, 128);">Move the mouse over the entity to display the id.</font>', unsafe_allow_html=True)
202
  # print('dic...........:',biotag_dic.keys())
@@ -208,8 +222,9 @@ entity_end=0
208
 
209
  # poid_counts = []
210
 
211
- hpoid_count1={}
212
  hpoid_count2 = {}
 
213
 
214
  tag_display = {}
215
 
@@ -256,6 +271,28 @@ if len(tag_result2) >= 0:
256
 
257
  flag = True
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  if not flag:
260
  html_results = doc
261
  else:
@@ -273,11 +310,13 @@ else:
273
  html_results += '<font style="background-color: rgb(255, 204, 0)' + ';" title="' + entity_id + '">' + doc[entity_start:entity_end] + '</font>'
274
  elif type == "2":
275
  html_results += '<font style="background-color: rgb(255, 0, 0)' + ';" title="' + entity_id + '">' + doc[entity_start:entity_end] + '</font>'
 
 
 
276
  html_results += doc[entity_end:]
277
 
278
  st.markdown('<table border="1"><tr><td>'+html_results+'</td></tr></table>', unsafe_allow_html=True)
279
 
280
-
281
  #table
282
  data_entity=[]
283
  for ele in hpoid_count1.keys():
@@ -296,6 +335,16 @@ for ele in hpoid_count2.keys():
296
  temp=[ele,term_name,hpoid_count2[ele]] #hpoid, term name, count
297
  data_entity.append(temp)
298
 
 
 
 
 
 
 
 
 
 
 
299
  st.markdown("")
300
  st.markdown("")
301
  # st.markdown("## Table output:")
 
1
  # -*- coding: utf-8 -*-
2
  """
3
  Updated on 09/13/2023
4
+
5
  """
6
 
7
  import streamlit as st
 
86
  'checkpoint_path': './vocab/bioformer-cased-v1.0/bioformer-cased-v1.0-model.ckpt-2000000',
87
  'vocab_path': './vocab/bioformer-cased-v1.0/vocab.txt'}
88
 
89
+ modelfile = './vocab/bioformer_fyeco.h5'
90
 
91
  elif model == '2':
92
  vocabfiles = {'labelfile': './dict_new_hpo/lable.vocab',
 
100
 
101
  modelfile='./vocab/bioformer_p5n5_b64_1e-5_95_hponew3.h5'
102
 
103
+ elif model == '3':
104
+ vocabfiles = {'labelfile': './dict_new_sympo/lable.vocab',
105
+ 'config_path': './vocab/bioformer-cased-v1.0/bert_config.json',
106
+ 'checkpoint_path': './vocab/bioformer-cased-v1.0/bioformer-cased-v1.0-model.ckpt-2000000',
107
+ 'vocab_path': './vocab/bioformer-cased-v1.0/vocab.txt'}
108
+
109
+ ontfiles = {'dic_file': './dict_new_sympo/noabb_lemma.dic',
110
+ 'word_hpo_file': './dict_new_sympo/word_id_map.json',
111
+ 'hpo_word_file': './dict_new_sympo/id_word_map.json'}
112
+
113
+ modelfile='./vocab/bioformer_sympo.h5'
114
+ pass
115
+
116
  biotag_dic=dic_ont(ontfiles)
117
 
118
  nn_model=bioTag_Bioformer(vocabfiles)
 
121
 
122
  nn_model1, biotag_dic1 = load_model(model='1')
123
  nn_model2, biotag_dic2 = load_model(model='2')
124
+ nn_model3, biotag_dic3 = load_model(model='3')
125
 
126
  else:
127
  @st.cache_resource
 
208
  st.markdown("")
209
  st.markdown("## ⏳ Tagging results:")
210
  with st.spinner('Wait for tagging...'):
 
211
  tag_result1=bioTag(doc,biotag_dic1,nn_model1,onlyLongest=para_set['onlyLongest'], abbrRecog=para_set['abbrRecog'],Threshold=para_set['ML_Threshold'])
212
  tag_result2=bioTag(doc,biotag_dic2,nn_model2,onlyLongest=para_set['onlyLongest'], abbrRecog=para_set['abbrRecog'],Threshold=para_set['ML_Threshold'])
213
+ tag_result3=bioTag(doc,biotag_dic3,nn_model3,onlyLongest=para_set['onlyLongest'], abbrRecog=para_set['abbrRecog'],Threshold=para_set['ML_Threshold'])
214
 
215
  st.markdown('<font style="color: rgb(128, 128, 128);">Move the mouse over the entity to display the id.</font>', unsafe_allow_html=True)
216
  # print('dic...........:',biotag_dic.keys())
 
222
 
223
  # poid_counts = []
224
 
225
+ hpoid_count1= {}
226
  hpoid_count2 = {}
227
+ hpoid_count3 = {}
228
 
229
  tag_display = {}
230
 
 
271
 
272
  flag = True
273
 
274
+ if len(tag_result3) >= 0:
275
+ entity_end = 0
276
+ for ele in tag_result3:
277
+ entity_start = int(ele[0])
278
+ #html_results += doc[entity_end:entity_start]
279
+ entity_end = int(ele[1])
280
+ entity_id = ele[2]
281
+ entity_score = ele[3]
282
+ tag_display[entity_start] = (entity_end, entity_id, "3")
283
+ text_results += ele[0] + '\t' + ele[1] + '\t' + doc[entity_start:entity_end] + '\t' + ele[2] + '\t' + format(
284
+ float(ele[3]), '.2f') + '\n'
285
+
286
+ if entity_id not in hpoid_count3.keys():
287
+ hpoid_count3[entity_id] = 1
288
+ else:
289
+ hpoid_count3[entity_id] += 1
290
+
291
+ # html_results += '<font style="background-color: rgb(255, 0, 0)' + ';" title="' + entity_id + '">' + doc[entity_start:entity_end] + '</font>'
292
+ #html_results += doc[entity_end:]
293
+
294
+ flag = True
295
+
296
  if not flag:
297
  html_results = doc
298
  else:
 
310
  html_results += '<font style="background-color: rgb(255, 204, 0)' + ';" title="' + entity_id + '">' + doc[entity_start:entity_end] + '</font>'
311
  elif type == "2":
312
  html_results += '<font style="background-color: rgb(255, 0, 0)' + ';" title="' + entity_id + '">' + doc[entity_start:entity_end] + '</font>'
313
+ elif type == "3":
314
+ html_results += '<font style="background-color: rgb(102, 255, 178)' + ';" title="' + entity_id + '">' + doc[entity_start:entity_end] + '</font>'
315
+
316
  html_results += doc[entity_end:]
317
 
318
  st.markdown('<table border="1"><tr><td>'+html_results+'</td></tr></table>', unsafe_allow_html=True)
319
 
 
320
  #table
321
  data_entity=[]
322
  for ele in hpoid_count1.keys():
 
335
  temp=[ele,term_name,hpoid_count2[ele]] #hpoid, term name, count
336
  data_entity.append(temp)
337
 
338
+ for ele in hpoid_count3.keys():
339
+ segs=ele.split(';')
340
+ term_name=''
341
+ for seg in segs:
342
+ term_name+=biotag_dic3.hpo_word[seg][0]+';'
343
+ temp=[ele,term_name,hpoid_count3[ele]] #hpoid, term name, count
344
+ data_entity.append(temp)
345
+
346
+
347
+
348
  st.markdown("")
349
  st.markdown("")
350
  # st.markdown("## Table output:")