sorryhyun commited on
Commit
36a21d1
1 Parent(s): 6a1ff6d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -4
README.md CHANGED
@@ -45,8 +45,10 @@ if __name__ == '__main__':
45
  ## Usage (HuggingFace Transformers)
46
 
47
  ```python
48
- from transformers import AutoTokenizer, AutoModel
49
  import torch
 
 
50
  device = torch.device('cuda')
51
 
52
  # Sentences we want sentence embeddings for
@@ -54,11 +56,13 @@ sentences = ['This is an example sentence', 'Each sentence is converted']
54
 
55
  # Load model from HuggingFace Hub
56
  tokenizer = AutoTokenizer.from_pretrained('{MODEL_NAME}')
 
57
  model = AutoModel.from_pretrained('{MODEL_NAME}').to(device)
58
 
59
- tokenized_data = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
60
- dataloader = DataLoader(tokenized_data, batch_size=batch_size, pin_memory=True)
61
- all_outputs = torch.zeros((len(tokenized_data), self.hidden_size)).to(device)
 
62
  start_idx = 0
63
 
64
  # I used mean-pool method for sentence representation
 
45
  ## Usage (HuggingFace Transformers)
46
 
47
  ```python
48
+ from transformers import AutoTokenizer, AutoModel, DataCollatorWithPadding
49
  import torch
50
+ from torch.utils.data import DataLoader
51
+
52
  device = torch.device('cuda')
53
 
54
  # Sentences we want sentence embeddings for
 
56
 
57
  # Load model from HuggingFace Hub
58
  tokenizer = AutoTokenizer.from_pretrained('{MODEL_NAME}')
59
+ collator = DataCollatorWithPadding(tokenizer)
60
  model = AutoModel.from_pretrained('{MODEL_NAME}').to(device)
61
 
62
+ tokenized_data = tokenizer(sentences, padding=True, truncation=True)
63
+ tokenized_data = tokenized_data.remove_columns('text')
64
+ dataloader = DataLoader(tokenized_data, batch_size=batch_size, pin_memory=True, collate_fn=collator)
65
+ all_outputs = torch.zeros((len(tokenized_data), 1024)).to(device)
66
  start_idx = 0
67
 
68
  # I used mean-pool method for sentence representation