Ben Nguyen
commited on
Commit
โข
7ffa8af
1
Parent(s):
c94e3bb
Inference endpoint
Browse files- README.md +49 -37
- handler.py +27 -0
- requirements.txt +1 -0
README.md
CHANGED
@@ -100,7 +100,7 @@ language:
|
|
100 |
license: cc-by-nc-sa-4.0
|
101 |
---
|
102 |
|
103 |
-
This is a [COMET](https://github.com/Unbabel/COMET) quality estimation model: It receives a source sentence and the respective translation and returns a score that reflects the quality of the translation.
|
104 |
|
105 |
# Paper
|
106 |
|
@@ -110,49 +110,61 @@ This is a [COMET](https://github.com/Unbabel/COMET) quality estimation model: It
|
|
110 |
|
111 |
cc-by-nc-sa-4.0
|
112 |
|
113 |
-
# Usage
|
114 |
-
|
115 |
-
Using this model requires unbabel-comet to be installed:
|
116 |
-
|
117 |
-
```bash
|
118 |
-
pip install --upgrade pip # ensures that pip is current
|
119 |
-
pip install unbabel-comet
|
120 |
-
```
|
121 |
-
|
122 |
-
Then you can use it through comet CLI:
|
123 |
-
|
124 |
-
```bash
|
125 |
-
comet-score -s {source-input}.txt -t {translation-output}.txt --model Unbabel/wmt22-cometkiwi-da
|
126 |
-
```
|
127 |
-
|
128 |
-
Or using Python:
|
129 |
|
130 |
```python
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
}
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
```
|
152 |
|
153 |
# Intended uses
|
154 |
|
155 |
-
|
156 |
|
157 |
Given a source text and its translation, outputs a single score between 0 and 1 where 1 represents a perfect translation.
|
158 |
|
|
|
100 |
license: cc-by-nc-sa-4.0
|
101 |
---
|
102 |
|
103 |
+
This is a [COMET](https://github.com/Unbabel/COMET) quality estimation model by Unbabel: It receives a source sentence and the respective translation and returns a score that reflects the quality of the translation.
|
104 |
|
105 |
# Paper
|
106 |
|
|
|
110 |
|
111 |
cc-by-nc-sa-4.0
|
112 |
|
113 |
+
# Usage for Inference Endpoint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
```python
|
116 |
+
import json
|
117 |
+
import requests
|
118 |
+
|
119 |
+
API_URL = ""
|
120 |
+
API_TOKEN="MY_API_KEY"
|
121 |
+
headers = {
|
122 |
+
"Authorization": f"Bearer {API_TOKEN}",
|
123 |
+
"Content-Type": "application/json",
|
124 |
+
}
|
125 |
+
|
126 |
+
def query(url, headers, payload):
|
127 |
+
data = json.dumps(payload)
|
128 |
+
response = requests.request("POST", url, headers=headers, data=data)
|
129 |
+
return json.loads(response.content.decode("utf-8"))
|
130 |
+
|
131 |
+
payload = {
|
132 |
+
"inputs": {
|
133 |
+
"batch_size": 8,
|
134 |
+
"workers": None,
|
135 |
+
"data": [
|
136 |
+
{
|
137 |
+
"src": "Youll be picking fruit and generally helping us do all the usual farm work",
|
138 |
+
"mt": "๋น์ ์ ๊ณผ์ผ์ ๋ฐ๊ธฐ๋ ํ๊ณ ๋์ฒด๋ก ์ฐ๋ฆฌ๊ฐ ํ๋ ์ผ์์ ์ธ ๋์ฅ ์ผ์ ๋๊ฒ ๋ ๊ฒ๋๋ค",
|
139 |
+
},{
|
140 |
+
"src": "Youll be picking fruit and generally helping us do all the usual farm work",
|
141 |
+
"mt": "๋น์ ์ ๊ณผ์ผ์ ๋ฐ๊ธฐ๋ ํ๊ณ ๋์ฒด๋ก ์ฐ๋ฆฌ๊ฐ ํ๋ ์ผ์์ ์ธ ๋์ฅ ์ผ์ ๋๊ฒ ๋ ๊ฒ๋๋ค",
|
142 |
+
},{
|
143 |
+
"src": "Youll be picking fruit and generally helping us do all the usual farm work",
|
144 |
+
"mt": "๋น์ ์ ๊ณผ์ผ์ ๋ฐ๊ธฐ๋ ํ๊ณ ๋์ฒด๋ก ์ฐ๋ฆฌ๊ฐ ํ๋ ์ผ์์ ์ธ ๋์ฅ ์ผ์ ๋๊ฒ ๋ ๊ฒ๋๋ค",
|
145 |
+
},{
|
146 |
+
"src": "Youll be picking fruit and generally helping us do all the usual farm work",
|
147 |
+
"mt": "๋น์ ์ ๊ณผ์ผ์ ๋ฐ๊ธฐ๋ ํ๊ณ ๋์ฒด๋ก ์ฐ๋ฆฌ๊ฐ ํ๋ ์ผ์์ ์ธ ๋์ฅ ์ผ์ ๋๊ฒ ๋ ๊ฒ๋๋ค",
|
148 |
+
},{
|
149 |
+
"src": "Youll be picking fruit and generally helping us do all the usual farm work",
|
150 |
+
"mt": "๋น์ ์ ๊ณผ์ผ์ ๋ฐ๊ธฐ๋ ํ๊ณ ๋์ฒด๋ก ์ฐ๋ฆฌ๊ฐ ํ๋ ์ผ์์ ์ธ ๋์ฅ ์ผ์ ๋๊ฒ ๋ ๊ฒ๋๋ค",
|
151 |
+
},{
|
152 |
+
"src": "Youll be picking fruit and generally helping us do all the usual farm work",
|
153 |
+
"mt": "๋น์ ์ ๊ณผ์ผ์ ๋ฐ๊ธฐ๋ ํ๊ณ ๋์ฒด๋ก ์ฐ๋ฆฌ๊ฐ ํ๋ ์ผ์์ ์ธ ๋์ฅ ์ผ์ ๋๊ฒ ๋ ๊ฒ๋๋ค",
|
154 |
+
},{
|
155 |
+
"src": "Youll be picking fruit and generally helping us do all the usual farm work",
|
156 |
+
"mt": "๋น์ ์ ๊ณผ์ผ์ ๋ฐ๊ธฐ๋ ํ๊ณ ๋์ฒด๋ก ์ฐ๋ฆฌ๊ฐ ํ๋ ์ผ์์ ์ธ ๋์ฅ ์ผ์ ๋๊ฒ ๋ ๊ฒ๋๋ค",
|
157 |
+
},
|
158 |
+
]
|
159 |
}
|
160 |
+
}
|
161 |
+
|
162 |
+
scores = query(API_URL, headers, payload)
|
163 |
```
|
164 |
|
165 |
# Intended uses
|
166 |
|
167 |
+
Unbabel's model is intented to be used for **reference-free MT evaluation**.
|
168 |
|
169 |
Given a source text and its translation, outputs a single score between 0 and 1 where 1 represents a perfect translation.
|
170 |
|
handler.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Any
|
2 |
+
from comet import load_from_checkpoint
|
3 |
+
|
4 |
+
|
5 |
+
class EndpointHandler():
|
6 |
+
def __init__(self, path=""):
|
7 |
+
self.model = load_from_checkpoint("checkpoints/model.ckpt")
|
8 |
+
|
9 |
+
def __call__(self, data: Dict[str, Any]) -> List[Any]:
|
10 |
+
"""
|
11 |
+
data args:
|
12 |
+
inputs (:obj: `dict[str, str]`)
|
13 |
+
Return:
|
14 |
+
A :obj: `dict`: will be serialized and returned
|
15 |
+
"""
|
16 |
+
|
17 |
+
inputs = data.pop("inputs")
|
18 |
+
|
19 |
+
batch_size = inputs.pop("batch_size")
|
20 |
+
workers = inputs.pop("workers")
|
21 |
+
|
22 |
+
data = inputs.pop("data")
|
23 |
+
|
24 |
+
model_output = self.model.predict(data, batch_size=batch_size, num_workers=workers, gpus=0)
|
25 |
+
scores = model_output["scores"]
|
26 |
+
|
27 |
+
return scores
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
git+https://github.com/Unbabel/COMET.git
|