dirkgr commited on
Commit
86ea642
1 Parent(s): 6579759

Trailing space breaks tokenization

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -98,7 +98,7 @@ from hf_olmo import OLMoForCausalLM, OLMoTokenizerFast
98
 
99
  olmo = OLMoForCausalLM.from_pretrained("allenai/OLMo-7B-Twin-2T")
100
  tokenizer = OLMoTokenizerFast.from_pretrained("allenai/OLMo-7B-Twin-2T")
101
- message = ["Language modeling is "]
102
  inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
103
  # optional verifying cuda
104
  # inputs = {k: v.to('cuda') for k,v in inputs.items()}
 
98
 
99
  olmo = OLMoForCausalLM.from_pretrained("allenai/OLMo-7B-Twin-2T")
100
  tokenizer = OLMoTokenizerFast.from_pretrained("allenai/OLMo-7B-Twin-2T")
101
+ message = ["Language modeling is"]
102
  inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
103
  # optional verifying cuda
104
  # inputs = {k: v.to('cuda') for k,v in inputs.items()}