|
The repository is adapted based on: https://huggingface.co./chenxran/bart-smiles/tree/main |
|
|
|
|
|
```python |
|
from transformers import AutoTokenizer, AutoModel, SequenceFeatureExtractor |
|
import torch |
|
from transformers import AutoTokenizer, AutoModel |
|
|
|
smiles = "CCC(=O)" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("./BARTSmiles/", add_prefix_space=True) |
|
inputs = tokenizer(smiles, return_tensors="pt", return_token_type_ids=False, add_special_tokens=True) |
|
|
|
model = AutoModel.from_pretrained('./BARTSmiles') |
|
model.eval() |
|
|
|
# Use a pipeline as a high-level helper |
|
from transformers import pipeline |
|
|
|
extractor = pipeline("feature-extraction", model=model, tokenizer=tokenizer) |
|
result = extractor(smiles, return_tensors=True, tokenize_kwargs={'return_token_type_ids':False}) |
|
|
|
``` |
|
|
|
## Citation |
|
|
|
@article{chilingaryan2022bartsmiles, |
|
title={Bartsmiles: Generative masked language models for molecular representations}, |
|
author={Chilingaryan, Gayane and Tamoyan, Hovhannes and Tevosyan, Ani and Babayan, Nelly and Khondkaryan, Lusine and Hambardzumyan, Karen and Navoyan, Zaven and Khachatrian, Hrant and Aghajanyan, Armen}, |
|
journal={arXiv preprint arXiv:2211.16349}, |
|
year={2022} |
|
} |
|
|