Spaces:
Running
Running
Working prototype
Browse files- README.md +4 -4
- encoder.py +9 -1
- index.html +20 -6
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: static
|
7 |
pinned: false
|
8 |
---
|
|
|
1 |
---
|
2 |
+
title: Pyodide GPT-2 Tokenizer
|
3 |
+
emoji: π
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: green
|
6 |
sdk: static
|
7 |
pinned: false
|
8 |
---
|
encoder.py
CHANGED
@@ -114,4 +114,12 @@ def get_encoder(model_name, models_dir):
|
|
114 |
return Encoder(
|
115 |
encoder=encoder,
|
116 |
bpe_merges=bpe_merges,
|
117 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
return Encoder(
|
115 |
encoder=encoder,
|
116 |
bpe_merges=bpe_merges,
|
117 |
+
)
|
118 |
+
|
119 |
+
def get_encoder_from_strings(vocab, bpe_data):
|
120 |
+
encoder = json.loads(vocab)
|
121 |
+
bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
|
122 |
+
return Encoder(
|
123 |
+
encoder=encoder,
|
124 |
+
bpe_merges=bpe_merges,
|
125 |
+
)
|
index.html
CHANGED
@@ -7,15 +7,29 @@
|
|
7 |
Pyodide test page <br>
|
8 |
Open your browser console to see Pyodide output
|
9 |
<script type="text/javascript">
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
const pyodide = await loadPyodide({
|
12 |
indexURL : "https://cdn.jsdelivr.net/pyodide/v0.19.1/full/"
|
13 |
});
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
})();
|
20 |
</script>
|
21 |
</body>
|
|
|
7 |
Pyodide test page <br>
|
8 |
Open your browser console to see Pyodide output
|
9 |
<script type="text/javascript">
|
10 |
+
const URL_VOCAB = "https://huggingface.co/gpt2/resolve/main/vocab.json";
|
11 |
+
const URL_MERGES = "https://huggingface.co/gpt2/resolve/main/merges.txt";
|
12 |
+
|
13 |
+
|
14 |
+
(async function main() {
|
15 |
+
const vocab = await (await fetch(URL_VOCAB)).text();
|
16 |
+
const merges = await (await fetch(URL_MERGES)).text();
|
17 |
+
|
18 |
+
const py_code = await (await fetch("./encoder.py")).text();
|
19 |
+
const c = console;
|
20 |
+
|
21 |
const pyodide = await loadPyodide({
|
22 |
indexURL : "https://cdn.jsdelivr.net/pyodide/v0.19.1/full/"
|
23 |
});
|
24 |
+
|
25 |
+
await pyodide.loadPackagesFromImports(py_code);
|
26 |
+
pyodide.runPython(py_code);
|
27 |
+
|
28 |
+
pyodide.globals.set("vocab", vocab);
|
29 |
+
pyodide.globals.set("merges", merges);
|
30 |
+
pyodide.runPython(`encoder = get_encoder_from_strings(vocab, merges)`);
|
31 |
+
const out = pyodide.runPython(`encoder.encode(${JSON.stringify("Hello my name is")})`);
|
32 |
+
c.log(Array.from(out));
|
33 |
})();
|
34 |
</script>
|
35 |
</body>
|