Added wembedding_service folder.
Browse filesAdded wembedding_server folder running in vm.
wembedding_service/compute_wembeddings.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
#
|
3 |
+
# Copyright 2020 Institute of Formal and Applied Linguistics, Faculty of
|
4 |
+
# Mathematics and Physics, Charles University, Czech Republic.
|
5 |
+
#
|
6 |
+
# This Source Code Form is subject to the terms of the Mozilla Public
|
7 |
+
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
8 |
+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
9 |
+
|
10 |
+
import sys
|
11 |
+
import zipfile
|
12 |
+
|
13 |
+
import numpy as np
|
14 |
+
|
15 |
+
import wembeddings.wembeddings as wembeddings
|
16 |
+
|
17 |
+
if __name__ == "__main__":
|
18 |
+
import argparse
|
19 |
+
|
20 |
+
# Parse arguments
|
21 |
+
parser = argparse.ArgumentParser()
|
22 |
+
parser.add_argument("input_path", type=str, help="Input file")
|
23 |
+
parser.add_argument("output_npz", type=str, help="Output NPZ file")
|
24 |
+
parser.add_argument("--batch_size", default=64, type=int, help="Batch size")
|
25 |
+
parser.add_argument("--dtype", default="float16", type=str, help="Dtype to save as")
|
26 |
+
parser.add_argument("--format", default="conllu", type=str, help="Input format (conllu, conll)")
|
27 |
+
parser.add_argument("--model", default="bert-base-multilingual-uncased-last4", type=str, help="Model name (see wembeddings.py for options)")
|
28 |
+
parser.add_argument("--server", default=None, type=str, help="Use given server to compute the embeddings")
|
29 |
+
parser.add_argument("--threads", default=4, type=int, help="Threads to use")
|
30 |
+
args = parser.parse_args()
|
31 |
+
|
32 |
+
args.dtype = getattr(np, args.dtype)
|
33 |
+
assert args.format in ["conll", "conllu"]
|
34 |
+
|
35 |
+
# Load the input file
|
36 |
+
sentences = []
|
37 |
+
with open(args.input_path, mode="r", encoding="utf-8") as input_file:
|
38 |
+
in_sentence = False
|
39 |
+
for line in input_file:
|
40 |
+
line = line.rstrip("\n")
|
41 |
+
if line:
|
42 |
+
if not in_sentence:
|
43 |
+
sentences.append([])
|
44 |
+
in_sentence = True
|
45 |
+
|
46 |
+
columns = line.split("\t")
|
47 |
+
if args.format == "conll":
|
48 |
+
sentences[-1].append(columns[0])
|
49 |
+
elif args.format == "conllu":
|
50 |
+
if columns[0].isdigit():
|
51 |
+
assert len(columns) == 10
|
52 |
+
sentences[-1].append(columns[1])
|
53 |
+
else:
|
54 |
+
in_sentence = False
|
55 |
+
print("Loaded {} sentences and {} words.".format(len(sentences), sum(map(len, sentences))), file=sys.stderr, flush=True)
|
56 |
+
|
57 |
+
# Initialize suitable computational class
|
58 |
+
if args.server is not None:
|
59 |
+
wembeddings = wembeddings.WEmbeddings.ClientNetwork(args.server)
|
60 |
+
else:
|
61 |
+
wembeddings = wembeddings.WEmbeddings(threads=args.threads)
|
62 |
+
|
63 |
+
# Compute word embeddings
|
64 |
+
with zipfile.ZipFile(args.output_npz, mode="w", compression=zipfile.ZIP_STORED) as output_npz:
|
65 |
+
for i in range(0, len(sentences), args.batch_size):
|
66 |
+
sentences_embeddings = wembeddings.compute_embeddings(args.model, sentences[i:i + args.batch_size])
|
67 |
+
for j, sentence_embeddings in enumerate(sentences_embeddings):
|
68 |
+
with output_npz.open("arr_{}".format(i + j), mode="w") as embeddings_file:
|
69 |
+
np.save(embeddings_file, sentence_embeddings.astype(args.dtype))
|
70 |
+
if (i + j + 1) % 100 == 0:
|
71 |
+
print("Processed {}/{} sentences.".format(i + j + 1, len(sentences)), file=sys.stderr, flush=True)
|
72 |
+
print("Done, all embeddings saved.", file=sys.stderr, flush=True)
|
wembedding_service/start_wembeddings_server.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# coding=utf-8
|
3 |
+
#
|
4 |
+
# Copyright 2020 Institute of Formal and Applied Linguistics, Faculty of
|
5 |
+
# Mathematics and Physics, Charles University, Czech Republic.
|
6 |
+
#
|
7 |
+
# This Source Code Form is subject to the terms of the Mozilla Public
|
8 |
+
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
9 |
+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
10 |
+
|
11 |
+
"""Word embeddings server.
|
12 |
+
|
13 |
+
Example setup:
|
14 |
+
$ venv/bin/python ./wembeddings_server.py
|
15 |
+
|
16 |
+
Example call:
|
17 |
+
$ curl --data-binary @examples/request.json localhost:8000/wembeddings | xxd
|
18 |
+
"""
|
19 |
+
|
20 |
+
import signal
|
21 |
+
import os
|
22 |
+
import sys
|
23 |
+
import threading
|
24 |
+
import time
|
25 |
+
|
26 |
+
import numpy as np
|
27 |
+
|
28 |
+
import wembeddings.wembeddings as wembeddings
|
29 |
+
import wembeddings.wembeddings_server as wembeddings_server
|
30 |
+
|
31 |
+
if __name__ == "__main__":
|
32 |
+
import argparse
|
33 |
+
|
34 |
+
# Parse arguments
|
35 |
+
parser = argparse.ArgumentParser()
|
36 |
+
parser.add_argument("port", type=int, help="Port to use")
|
37 |
+
parser.add_argument("--dtype", default="float16", type=str, help="Dtype to serve the embeddings as")
|
38 |
+
parser.add_argument("--logfile", default=None, type=str, help="Log path")
|
39 |
+
parser.add_argument("--preload_models", default=[], nargs="*", type=str, help="Models to preload, or `all`")
|
40 |
+
parser.add_argument("--preload_only", default=False, action="store_true", help="Only preload models and exit")
|
41 |
+
parser.add_argument("--threads", default=4, type=int, help="Threads to use")
|
42 |
+
args = parser.parse_args()
|
43 |
+
args.dtype = getattr(np, args.dtype)
|
44 |
+
|
45 |
+
# Log stderr to logfile if given
|
46 |
+
if args.logfile is not None:
|
47 |
+
sys.stderr = open(args.logfile, "a", encoding="utf-8")
|
48 |
+
|
49 |
+
# Lambda to create the WEmbeddings instance
|
50 |
+
wembeddings_lambda = lambda: wembeddings.WEmbeddings(threads=args.threads, preload_models=args.preload_models)
|
51 |
+
|
52 |
+
if args.preload_only:
|
53 |
+
print("Preloading models only.", file=sys.stderr)
|
54 |
+
wembeddings_lambda()
|
55 |
+
sys.exit(0)
|
56 |
+
|
57 |
+
# Create the server and its own thread
|
58 |
+
server = wembeddings_server.WEmbeddingsServer(args.port, args.dtype, wembeddings_lambda)
|
59 |
+
server_thread = threading.Thread(target=server.serve_forever, daemon=True)
|
60 |
+
server_thread.start()
|
61 |
+
|
62 |
+
print("Starting WEmbeddings server on port {}.".format(args.port), file=sys.stderr)
|
63 |
+
print("To stop it gracefully, either send SIGINT (Ctrl+C) or SIGUSR1.", file=sys.stderr, flush=True)
|
64 |
+
|
65 |
+
def shutdown():
|
66 |
+
print("Initiating shutdown of the WEmbeddings server.", file=sys.stderr, flush=True)
|
67 |
+
server.shutdown()
|
68 |
+
print("Stopped handling new requests, processing all current ones.", file=sys.stderr, flush=True)
|
69 |
+
server.server_close()
|
70 |
+
print("Finished shutdown of the WEmbeddings server.", file=sys.stderr, flush=True)
|
71 |
+
|
72 |
+
# Serve
|
73 |
+
if os.name != 'nt':
|
74 |
+
# Wait for one of the signals on Posix systems.
|
75 |
+
signal.pthread_sigmask(signal.SIG_BLOCK, [signal.SIGINT, signal.SIGUSR1])
|
76 |
+
signal.sigwait([signal.SIGINT, signal.SIGUSR1])
|
77 |
+
shutdown()
|
78 |
+
else:
|
79 |
+
# On Windows, allow interruption with Ctrl+C -- for testing only.
|
80 |
+
def signal_handler(sig, frame):
|
81 |
+
shutdown()
|
82 |
+
sys.exit(0)
|
83 |
+
signal.signal(signal.SIGINT, signal_handler)
|
84 |
+
while True:
|
85 |
+
time.sleep(1)
|
wembedding_service/wembeddings/__pycache__/wembeddings.cpython-37.pyc
ADDED
Binary file (6.5 kB). View file
|
|
wembedding_service/wembeddings/wembeddings.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# coding=utf-8
|
3 |
+
#
|
4 |
+
# Copyright 2020 Institute of Formal and Applied Linguistics, Faculty of
|
5 |
+
# Mathematics and Physics, Charles University, Czech Republic.
|
6 |
+
#
|
7 |
+
# This Source Code Form is subject to the terms of the Mozilla Public
|
8 |
+
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
9 |
+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
10 |
+
|
11 |
+
"""Word embeddings computation class."""
|
12 |
+
|
13 |
+
import json
|
14 |
+
import sys
|
15 |
+
import time
|
16 |
+
import urllib.request
|
17 |
+
|
18 |
+
import numpy as np
|
19 |
+
|
20 |
+
|
21 |
+
class WEmbeddings:
|
22 |
+
"""Class to keep multiple constructed word embedding computation models."""
|
23 |
+
|
24 |
+
MODELS_MAP = {
|
25 |
+
# Key: model name. Value: transformer model name, layer start, layer end.
|
26 |
+
"bert-base-multilingual-uncased-last4": ("bert-base-multilingual-uncased", -4, None),
|
27 |
+
"robeczech-base-last4": ("ufal/robeczech-base", -4, None),
|
28 |
+
"xlm-roberta-base-last4": ("xlm-roberta-base", -4, None),
|
29 |
+
"bert-large-portuguese-cased-last4":("neuralmind/bert-large-portuguese-cased", -4, None),
|
30 |
+
"bert-base-portuguese-cased-last4":("neuralmind/bert-base-portuguese-cased", -4, None),
|
31 |
+
}
|
32 |
+
|
33 |
+
MAX_SUBWORDS_PER_SENTENCE = 510
|
34 |
+
|
35 |
+
class _Model:
|
36 |
+
"""Construct a tokenizer and transformers model graph."""
|
37 |
+
def __init__(self, transformers_model, layer_start, layer_end, loader_lock):
|
38 |
+
self._model_loaded = False
|
39 |
+
self._transformers_model_name = transformers_model
|
40 |
+
self._layer_start = layer_start
|
41 |
+
self._layer_end = layer_end
|
42 |
+
self._loader_lock = loader_lock
|
43 |
+
|
44 |
+
def load(self):
|
45 |
+
if self._model_loaded: return
|
46 |
+
with self._loader_lock:
|
47 |
+
import tensorflow as tf
|
48 |
+
import transformers
|
49 |
+
|
50 |
+
if self._model_loaded: return
|
51 |
+
|
52 |
+
self.tokenizer = transformers.AutoTokenizer.from_pretrained(self._transformers_model_name, use_fast=True)
|
53 |
+
|
54 |
+
self._transformers_model = transformers.TFAutoModel.from_pretrained(
|
55 |
+
self._transformers_model_name,
|
56 |
+
config=transformers.AutoConfig.from_pretrained(self._transformers_model_name, output_hidden_states=True),
|
57 |
+
from_pt=True
|
58 |
+
)
|
59 |
+
|
60 |
+
def compute_embeddings(subwords, segments):
|
61 |
+
subword_embeddings_layers = self._transformers_model(
|
62 |
+
(tf.maximum(subwords, 0), tf.cast(tf.not_equal(subwords, -1), tf.int32))
|
63 |
+
).hidden_states
|
64 |
+
subword_embeddings = tf.math.reduce_mean(subword_embeddings_layers[self._layer_start:self._layer_end], axis=0)
|
65 |
+
|
66 |
+
# Average subwords (word pieces) word embeddings for each token
|
67 |
+
def average_subwords(embeddings_and_segments):
|
68 |
+
subword_embeddings, segments = embeddings_and_segments
|
69 |
+
return tf.math.segment_mean(subword_embeddings, segments)
|
70 |
+
word_embeddings = tf.map_fn(average_subwords, (subword_embeddings[:, 1:], segments), dtype=tf.float32)[:, :-1]
|
71 |
+
return word_embeddings
|
72 |
+
self.compute_embeddings = tf.function(compute_embeddings).get_concrete_function(
|
73 |
+
tf.TensorSpec(shape=[None, None], dtype=tf.int32), tf.TensorSpec(shape=[None, None], dtype=tf.int32)
|
74 |
+
)
|
75 |
+
|
76 |
+
self._model_loaded = True
|
77 |
+
|
78 |
+
|
79 |
+
def __init__(self, max_form_len=64, threads=None, preload_models=[]):
|
80 |
+
import tensorflow as tf
|
81 |
+
import threading
|
82 |
+
|
83 |
+
# Impose the limit on the number of threads, if given
|
84 |
+
if threads is not None:
|
85 |
+
tf.config.threading.set_inter_op_parallelism_threads(threads)
|
86 |
+
tf.config.threading.set_intra_op_parallelism_threads(threads)
|
87 |
+
|
88 |
+
self._max_form_len = max_form_len
|
89 |
+
|
90 |
+
loader_lock = threading.Lock()
|
91 |
+
self._models = {}
|
92 |
+
for model_name, (transformers_model, layer_start, layer_end) in self.MODELS_MAP.items():
|
93 |
+
self._models[model_name] = self._Model(transformers_model, layer_start, layer_end, loader_lock)
|
94 |
+
|
95 |
+
if model_name in preload_models or "all" in preload_models:
|
96 |
+
self._models[model_name].load()
|
97 |
+
|
98 |
+
def compute_embeddings(self, model, sentences):
|
99 |
+
"""Computes word embeddings.
|
100 |
+
Arguments:
|
101 |
+
model: one of the keys of self.MODELS_MAP.
|
102 |
+
sentences: 2D Python array with sentences with tokens (strings).
|
103 |
+
Returns:
|
104 |
+
embeddings as a Python list of 1D Numpy arrays
|
105 |
+
"""
|
106 |
+
|
107 |
+
if model not in self._models:
|
108 |
+
print("No such WEmbeddings model {}".format(model), file=sys.stderr, flush=True)
|
109 |
+
|
110 |
+
embeddings = []
|
111 |
+
if sentences:
|
112 |
+
model = self._models[model]
|
113 |
+
model.load()
|
114 |
+
|
115 |
+
time_tokenization = time.time()
|
116 |
+
|
117 |
+
sentences_subwords = model.tokenizer(
|
118 |
+
[(" " if i else "") + word[:self._max_form_len] for sentence in sentences for i, word in enumerate(sentence)],
|
119 |
+
add_special_tokens=False
|
120 |
+
).input_ids
|
121 |
+
|
122 |
+
subwords, segments, parts = [], [], []
|
123 |
+
for sentence in sentences:
|
124 |
+
segments.append([])
|
125 |
+
subwords.append([])
|
126 |
+
parts.append([0])
|
127 |
+
sentence_subwords, sentences_subwords = sentences_subwords[:len(sentence)], sentences_subwords[len(sentence):]
|
128 |
+
for word_subwords in sentence_subwords:
|
129 |
+
# Split sentences with too many subwords
|
130 |
+
if len(subwords[-1]) + len(word_subwords) > self.MAX_SUBWORDS_PER_SENTENCE:
|
131 |
+
subwords[-1] = model.tokenizer.build_inputs_with_special_tokens(subwords[-1])
|
132 |
+
segments.append([])
|
133 |
+
subwords.append([])
|
134 |
+
parts[-1].append(0)
|
135 |
+
segments[-1].extend([parts[-1][-1]] * len(word_subwords))
|
136 |
+
subwords[-1].extend(word_subwords)
|
137 |
+
parts[-1][-1] += 1
|
138 |
+
subwords[-1] = model.tokenizer.build_inputs_with_special_tokens(subwords[-1])
|
139 |
+
|
140 |
+
max_sentence_len = max(len(sentence) for sentence in sentences)
|
141 |
+
max_subwords = max(len(sentence) for sentence in subwords)
|
142 |
+
|
143 |
+
time_embeddings = time.time()
|
144 |
+
np_subwords = np.full([len(subwords), max_subwords], -1, np.int32)
|
145 |
+
for i, subword in enumerate(subwords):
|
146 |
+
np_subwords[i, :len(subword)] = subword
|
147 |
+
|
148 |
+
np_segments = np.full([len(segments), max_subwords - 1], max_sentence_len, np.int32)
|
149 |
+
for i, segment in enumerate(segments):
|
150 |
+
np_segments[i, :len(segment)] = segment
|
151 |
+
|
152 |
+
embeddings_with_parts = model.compute_embeddings(np_subwords, np_segments).numpy()
|
153 |
+
|
154 |
+
# Concatenate splitted sentences
|
155 |
+
current_sentence_part = 0
|
156 |
+
for sentence_parts in parts:
|
157 |
+
embeddings.append(np.concatenate(
|
158 |
+
[embeddings_with_parts[current_sentence_part + i, :sentence_part] for i, sentence_part in enumerate(sentence_parts)],
|
159 |
+
axis=0))
|
160 |
+
current_sentence_part += len(sentence_parts)
|
161 |
+
|
162 |
+
print("WEmbeddings in {:.1f}ms,".format(1000 * (time.time() - time_embeddings)),
|
163 |
+
"tokenization in {:.1f}ms,".format(1000*(time_embeddings - time_tokenization)),
|
164 |
+
"batch {},".format(len(sentences)),
|
165 |
+
"max sentence len {},".format(max_sentence_len),
|
166 |
+
"max subwords {}.".format(max_subwords),
|
167 |
+
file=sys.stderr, flush=True)
|
168 |
+
|
169 |
+
return embeddings
|
170 |
+
|
171 |
+
|
172 |
+
class ClientNetwork:
|
173 |
+
def __init__(self, url):
|
174 |
+
self._url = url
|
175 |
+
def compute_embeddings(self, model, sentences):
|
176 |
+
with urllib.request.urlopen(
|
177 |
+
"http://{}/wembeddings".format(self._url),
|
178 |
+
data=json.dumps({"model": model, "sentences": sentences}, ensure_ascii=True).encode("ascii"),
|
179 |
+
) as response:
|
180 |
+
embeddings = []
|
181 |
+
for _ in sentences:
|
182 |
+
embeddings.append(np.lib.format.read_array(response, allow_pickle=False))
|
183 |
+
return embeddings
|
wembedding_service/wembeddings/wembeddings_server.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# coding=utf-8
|
3 |
+
#
|
4 |
+
# Copyright 2020 Institute of Formal and Applied Linguistics, Faculty of
|
5 |
+
# Mathematics and Physics, Charles University, Czech Republic.
|
6 |
+
#
|
7 |
+
# This Source Code Form is subject to the terms of the Mozilla Public
|
8 |
+
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
9 |
+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
10 |
+
|
11 |
+
"""Word embeddings server class."""
|
12 |
+
|
13 |
+
import http.server
|
14 |
+
import json
|
15 |
+
import socketserver
|
16 |
+
import os
|
17 |
+
import sys
|
18 |
+
import threading
|
19 |
+
import urllib.parse
|
20 |
+
|
21 |
+
import numpy as np
|
22 |
+
|
23 |
+
class WEmbeddingsServer(socketserver.ThreadingTCPServer):
|
24 |
+
|
25 |
+
class WEmbeddingsRequestHandler(http.server.BaseHTTPRequestHandler):
|
26 |
+
protocol_version = "HTTP/1.1"
|
27 |
+
|
28 |
+
def respond(request, content_type, code=200):
|
29 |
+
request.close_connection = True
|
30 |
+
request.send_response(code)
|
31 |
+
request.send_header("Connection", "close")
|
32 |
+
request.send_header("Content-Type", content_type)
|
33 |
+
request.send_header("Access-Control-Allow-Origin", "*")
|
34 |
+
request.end_headers()
|
35 |
+
|
36 |
+
def respond_error(request, message, code=400):
|
37 |
+
request.respond("text/plain", code)
|
38 |
+
request.wfile.write(message.encode("utf-8"))
|
39 |
+
|
40 |
+
def do_POST(request):
|
41 |
+
try:
|
42 |
+
request.path = request.path.encode("iso-8859-1").decode("utf-8")
|
43 |
+
url = urllib.parse.urlparse(request.path)
|
44 |
+
except:
|
45 |
+
return request.respond_error("Cannot parse request URL.")
|
46 |
+
|
47 |
+
# Handle /wembeddings
|
48 |
+
if url.path == "/wembeddings":
|
49 |
+
if request.headers.get("Transfer-Encoding", "identity").lower() != "identity":
|
50 |
+
return request.respond_error("Only 'identity' Transfer-Encoding of payload is supported for now.")
|
51 |
+
|
52 |
+
if "Content-Length" not in request.headers:
|
53 |
+
return request.respond_error("The Content-Length of payload is required.")
|
54 |
+
|
55 |
+
try:
|
56 |
+
length = int(request.headers["Content-Length"])
|
57 |
+
data = json.loads(request.rfile.read(length))
|
58 |
+
model, sentences = data["model"], data["sentences"]
|
59 |
+
except:
|
60 |
+
import traceback
|
61 |
+
traceback.print_exc(file=sys.stderr)
|
62 |
+
sys.stderr.flush()
|
63 |
+
return request.respond_error("Malformed request.")
|
64 |
+
|
65 |
+
try:
|
66 |
+
with request.server._wembeddings_mutex:
|
67 |
+
sentences_embeddings = request.server._wembeddings.compute_embeddings(model, sentences)
|
68 |
+
except:
|
69 |
+
import traceback
|
70 |
+
traceback.print_exc(file=sys.stderr)
|
71 |
+
sys.stderr.flush()
|
72 |
+
return request.respond_error("An error occurred during wembeddings computation.")
|
73 |
+
|
74 |
+
request.respond("application/octet_stream")
|
75 |
+
for sentence_embedding in sentences_embeddings:
|
76 |
+
np.lib.format.write_array(request.wfile, sentence_embedding.astype(request.server._dtype), allow_pickle=False)
|
77 |
+
|
78 |
+
# URL not found
|
79 |
+
else:
|
80 |
+
request.respond_error("No handler for the given URL '{}'".format(url.path), code=404)
|
81 |
+
|
82 |
+
def do_GET(request):
|
83 |
+
try:
|
84 |
+
request.path = request.path.encode("iso-8859-1").decode("utf-8")
|
85 |
+
url = urllib.parse.urlparse(request.path)
|
86 |
+
except:
|
87 |
+
return request.respond_error("Cannot parse request URL.")
|
88 |
+
|
89 |
+
if url.path == "/status":
|
90 |
+
request.respond("application/json")
|
91 |
+
request.wfile.write(bytes("""{"status": "UP"}""", "utf-8"))
|
92 |
+
# URL not found
|
93 |
+
else:
|
94 |
+
request.respond_error("No handler for the given URL '{}'".format(url.path), code=404)
|
95 |
+
|
96 |
+
daemon_threads = False
|
97 |
+
|
98 |
+
def __init__(self, port, dtype, wembeddings_lambda):
|
99 |
+
self._dtype = dtype
|
100 |
+
|
101 |
+
# Create the WEmbeddings object its mutex
|
102 |
+
self._wembeddings = wembeddings_lambda()
|
103 |
+
self._wembeddings_mutex = threading.Lock()
|
104 |
+
|
105 |
+
# Initialize the server
|
106 |
+
super().__init__(("", port), self.WEmbeddingsRequestHandler)
|
107 |
+
|
108 |
+
def server_bind(self):
|
109 |
+
import socket
|
110 |
+
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
111 |
+
if os.name != 'nt':
|
112 |
+
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
|
113 |
+
super().server_bind()
|
114 |
+
|
115 |
+
def service_actions(self):
|
116 |
+
if isinstance(getattr(self, "_threads", None), list):
|
117 |
+
if len(self._threads) >= 1024:
|
118 |
+
self._threads = [thread for thread in self._threads if thread.is_alive()]
|