Update README.md
Browse files
README.md
CHANGED
@@ -26,13 +26,11 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained("metagene-ai/METAGENE-1")
|
27 |
model = AutoModelForCausalLM.from_pretrained("metagene-ai/METAGENE-1", torch_dtype=torch.bfloat16)
|
28 |
|
29 |
-
# Example input
|
30 |
-
input_sequence =
|
31 |
-
"TCACCGTTCTACAATCCCAAGCTGGAGTCAAGCTCAACAGGGTCTTCTTGCCCCGCTGAGGGTTACACTCGCCCGTTCCCGAGTCTGTGGTTTCGCGAAGATATGACCAGGGACAGTAAGAACC"
|
32 |
-
)
|
33 |
|
34 |
# Tokenize the input sequence and truncate to the first 12 tokens
|
35 |
-
input_tokens = tokenizer.encode(input_sequence, return_tensors="pt", add_special_tokens=False)
|
36 |
|
37 |
# Generate output from the model with a max sequence length of 32 tokens
|
38 |
generated_tokens = model.generate(input_tokens, max_length=32)
|
@@ -41,9 +39,9 @@ generated_tokens = model.generate(input_tokens, max_length=32)
|
|
41 |
generated_sequence = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
|
42 |
generated_sequence = generated_sequence.replace(" ", "").replace("_", "")
|
43 |
|
44 |
-
#
|
45 |
-
print(f"馃搫 Input Sequence:\n{input_sequence}")
|
46 |
print(f"馃敩 Generated Sequence:\n{generated_sequence}")
|
|
|
47 |
```
|
48 |
|
49 |
## **Benchmark Performance**
|
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained("metagene-ai/METAGENE-1")
|
27 |
model = AutoModelForCausalLM.from_pretrained("metagene-ai/METAGENE-1", torch_dtype=torch.bfloat16)
|
28 |
|
29 |
+
# Example input sequence
|
30 |
+
input_sequence = "TCACCGTTCTACAATCCCAAGCTGGAGTCAAGCTCAACAGGGTCTTC"
|
|
|
|
|
31 |
|
32 |
# Tokenize the input sequence and truncate to the first 12 tokens
|
33 |
+
input_tokens = tokenizer.encode(input_sequence, return_tensors="pt", add_special_tokens=False)
|
34 |
|
35 |
# Generate output from the model with a max sequence length of 32 tokens
|
36 |
generated_tokens = model.generate(input_tokens, max_length=32)
|
|
|
39 |
generated_sequence = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
|
40 |
generated_sequence = generated_sequence.replace(" ", "").replace("_", "")
|
41 |
|
42 |
+
# Generated output: A Hexamita inflata 5.8S ribosomal RNA gene sequence
|
|
|
43 |
print(f"馃敩 Generated Sequence:\n{generated_sequence}")
|
44 |
+
# TCACCGTTCTACAATCCCAAGCTGGAGTCAAGCTCAACAGGGTCTTCTTGCCCCGCTGAGGGTTACACTCGCCCGTTCCCGAGTCTGTGGTTTCGCGAAGATATGACCAGGGACAGTAAGAACC
|
45 |
```
|
46 |
|
47 |
## **Benchmark Performance**
|