Spaces:
Runtime error
Runtime error
preloading models brute force
Browse files
app.py
CHANGED
@@ -2,48 +2,51 @@ import gradio as gr
|
|
2 |
from transformers import AutoModel, AutoTokenizer
|
3 |
from sklearn.neighbors import NearestNeighbors
|
4 |
|
5 |
-
available_models = ['
|
6 |
-
'
|
7 |
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
for MODEL in available_models:
|
12 |
-
models[MODEL] = AutoModel.from_pretrained(MODEL)
|
13 |
-
tokenizers[MODEL] = AutoTokenizer.from_pretrained(MODEL)
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
algorithm='auto',
|
26 |
-
n_jobs=3)
|
27 |
-
|
28 |
-
nbrs = knn_model.fit(embedding_matrix)
|
29 |
-
|
30 |
-
distances, indices = nbrs.kneighbors(embedding_matrix)
|
31 |
-
|
32 |
-
return distances,indices,tokenizers[MODEL]
|
33 |
|
34 |
|
35 |
title = "How does a word's meaning change with time?"
|
36 |
|
37 |
def topk(word,model):
|
38 |
outs = []
|
39 |
-
distances, indices, tokenizer = topk_model(model)
|
40 |
-
|
41 |
-
index = tokenizer.encode(f'{word}')
|
42 |
-
for i in indices[index[1]]:
|
43 |
-
outs.append(tokenizer.decode(i))
|
44 |
-
print(tokenizer.decode(i))
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# with gr.Blocks() as demo:
|
49 |
# gr.Markdown(f" # {title}")
|
|
|
2 |
from transformers import AutoModel, AutoTokenizer
|
3 |
from sklearn.neighbors import NearestNeighbors
|
4 |
|
5 |
+
available_models = ['2019',
|
6 |
+
'2020']
|
7 |
|
8 |
+
model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
|
9 |
+
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
|
10 |
+
embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight
|
11 |
+
embedding_matrix_2019 = embedding_matrix_2019.detach().numpy()
|
12 |
+
knn_model_2019 = NearestNeighbors(n_neighbors=500,
|
13 |
+
metric='cosine',
|
14 |
+
algorithm='auto',
|
15 |
+
n_jobs=3)
|
16 |
+
nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019)
|
17 |
+
distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019)
|
18 |
|
|
|
|
|
|
|
19 |
|
20 |
+
model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
|
21 |
+
tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
|
22 |
+
embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight
|
23 |
+
embedding_matrix_2020 = embedding_matrix_2020.detach().numpy()
|
24 |
+
knn_model_2020 = NearestNeighbors(n_neighbors=500,
|
25 |
+
metric='cosine',
|
26 |
+
algorithm='auto',
|
27 |
+
n_jobs=3)
|
28 |
+
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
|
29 |
+
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
title = "How does a word's meaning change with time?"
|
33 |
|
34 |
def topk(word,model):
|
35 |
outs = []
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
if model == '2019':
|
38 |
+
index = tokenizers_2019.encode(f'{word}')
|
39 |
+
for i in indices_2019[index[1]]:
|
40 |
+
outs.append(tokenizers_2019.decode(i))
|
41 |
+
print(tokenizers_2019.decode(i))
|
42 |
+
return outs
|
43 |
+
|
44 |
+
if model == '2020':
|
45 |
+
index = tokenizers_2020.encode(f'{word}')
|
46 |
+
for i in indices_2020[index[1]]:
|
47 |
+
outs.append(tokenizers_2020.decode(i))
|
48 |
+
print(tokenizers_2020.decode(i))
|
49 |
+
return outs
|
50 |
|
51 |
# with gr.Blocks() as demo:
|
52 |
# gr.Markdown(f" # {title}")
|