Jo Kristian Bergum
commited on
Commit
·
18e6702
1
Parent(s):
509e185
Import vespa-engine/col-minilm
Browse files- README.md +77 -0
- config.json +31 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.txt +0 -0
README.md
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MS Marco Ranking with ColBERT on Vespa.ai
|
2 |
+
|
3 |
+
Model is based on [ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction over BERT](https://arxiv.org/abs/2004.12832).
|
4 |
+
This BERT model is based on [cross-encoder/ms-marco-MiniLM-L-6-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-6-v2) and trained using the
|
5 |
+
original [ColBERT training routine](https://github.com/stanford-futuredata/ColBERT/).
|
6 |
+
|
7 |
+
This model has 22.3M trainable parameters and is approximately 2x faster than
|
8 |
+
[vespa-engine/colbert-medium](https://huggingface.co/vespa-engine/colbert-medium) and with better or on pair MRR@10 on dev.
|
9 |
+
|
10 |
+
The model weights have been tuned by training using a randomized sample of MS Marco training triplets
|
11 |
+
[MSMARCO-Passage-Ranking](https://github.com/microsoft/MSMARCO-Passage-Ranking).
|
12 |
+
|
13 |
+
To use this model with vespa.ai for MS Marco Passage Ranking, see
|
14 |
+
[MS Marco Ranking using Vespa.ai sample app](https://github.com/vespa-engine/sample-apps/tree/master/msmarco-ranking).
|
15 |
+
|
16 |
+
# MS Marco Passage Ranking
|
17 |
+
|
18 |
+
| MS Marco Passage Ranking Query Set | MRR@10 ColBERT on Vespa.ai |
|
19 |
+
|------------------------------------|----------------|
|
20 |
+
| Dev | 0.364 |
|
21 |
+
|
22 |
+
The MRR@10 on dev is achieved by re-ranking 1K retrieved by a dense retriever based on
|
23 |
+
[sentence-transformers/msmarco-MiniLM-L-6-v3](https://huggingface.co/sentence-transformers/msmarco-MiniLM-L-6-v3).
|
24 |
+
|
25 |
+
The official baseline BM25 ranking model MRR@10 0.16 on eval and 0.167 on dev question set.
|
26 |
+
See [MS Marco Passage Ranking Leaderboard](https://microsoft.github.io/msmarco/).
|
27 |
+
|
28 |
+
## Export ColBERT query encoder to ONNX
|
29 |
+
We represent the ColBERT query encoder in the Vespa runtime, to map the textual query representation to the tensor representation. For this
|
30 |
+
we use Vespa's support for running ONNX models. One can use the following snippet to export the model for serving.
|
31 |
+
|
32 |
+
```python
|
33 |
+
from transformers import BertModel
|
34 |
+
from transformers import BertPreTrainedModel
|
35 |
+
from transformers import BertConfig
|
36 |
+
import torch
|
37 |
+
import torch.nn as nn
|
38 |
+
|
39 |
+
class VespaColBERT(BertPreTrainedModel):
|
40 |
+
|
41 |
+
def __init__(self,config):
|
42 |
+
super().__init__(config)
|
43 |
+
self.bert = BertModel(config)
|
44 |
+
self.linear = nn.Linear(config.hidden_size, 32, bias=False)
|
45 |
+
self.init_weights()
|
46 |
+
|
47 |
+
def forward(self, input_ids, attention_mask):
|
48 |
+
Q = self.bert(input_ids,attention_mask=attention_mask)[0]
|
49 |
+
Q = self.linear(Q)
|
50 |
+
return torch.nn.functional.normalize(Q, p=2, dim=2)
|
51 |
+
|
52 |
+
colbert_query_encoder = VespaColBERT.from_pretrained("vespa-engine/col-minilm")
|
53 |
+
|
54 |
+
#Export model to ONNX for serving in Vespa
|
55 |
+
|
56 |
+
input_names = ["input_ids", "attention_mask"]
|
57 |
+
output_names = ["contextual"]
|
58 |
+
#input, max 32 query term
|
59 |
+
input_ids = torch.ones(1,32, dtype=torch.int64)
|
60 |
+
attention_mask = torch.ones(1,32,dtype=torch.int64)
|
61 |
+
args = (input_ids, attention_mask)
|
62 |
+
torch.onnx.export(colbert_query_encoder,
|
63 |
+
args=args,
|
64 |
+
f="query_encoder_colbert.onnx",
|
65 |
+
input_names = input_names,
|
66 |
+
output_names = output_names,
|
67 |
+
dynamic_axes = {
|
68 |
+
"input_ids": {0: "batch"},
|
69 |
+
"attention_mask": {0: "batch"},
|
70 |
+
"contextual": {0: "batch"},
|
71 |
+
},
|
72 |
+
opset_version=11)
|
73 |
+
```
|
74 |
+
|
75 |
+
# Representing the model on Vespa.ai
|
76 |
+
See [Ranking with ONNX models](https://docs.vespa.ai/documentation/onnx.html) and [MS Marco Ranking sample app](https://github.com/vespa-engine/sample-apps/tree/master/msmarco-ranking)
|
77 |
+
|
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "minilm",
|
3 |
+
"architectures": [
|
4 |
+
"ColBERT"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 1536,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 12,
|
23 |
+
"num_hidden_layers": 6,
|
24 |
+
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity",
|
27 |
+
"transformers_version": "4.4.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e67cd33989d5633a4ffae5726dafee6d4fd3f42b9f888315746b50c64c9814a0
|
3 |
+
size 90949065
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "minilm", "special_tokens_map_file": "/Users/bergum/.cache/huggingface/transformers/3295d833faab1b0a5258c61d5d6ba3db7c2414aca8614a8503c6deb89fc00611.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "tokenizer_file": null}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|