oliu-io commited on
Commit
6608414
verified
1 Parent(s): 1e8ac76

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -7
README.md CHANGED
@@ -26,13 +26,11 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
26
  tokenizer = AutoTokenizer.from_pretrained("metagene-ai/METAGENE-1")
27
  model = AutoModelForCausalLM.from_pretrained("metagene-ai/METAGENE-1", torch_dtype=torch.bfloat16)
28
 
29
- # Example input: Hexamita inflata 5.8S ribosomal RNA gene sequence
30
- input_sequence = (
31
- "TCACCGTTCTACAATCCCAAGCTGGAGTCAAGCTCAACAGGGTCTTCTTGCCCCGCTGAGGGTTACACTCGCCCGTTCCCGAGTCTGTGGTTTCGCGAAGATATGACCAGGGACAGTAAGAACC"
32
- )
33
 
34
  # Tokenize the input sequence and truncate to the first 12 tokens
35
- input_tokens = tokenizer.encode(input_sequence, return_tensors="pt", add_special_tokens=False)[..., :12]
36
 
37
  # Generate output from the model with a max sequence length of 32 tokens
38
  generated_tokens = model.generate(input_tokens, max_length=32)
@@ -41,9 +39,9 @@ generated_tokens = model.generate(input_tokens, max_length=32)
41
  generated_sequence = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
42
  generated_sequence = generated_sequence.replace(" ", "").replace("_", "")
43
 
44
- # Print the original input and the model's output
45
- print(f"馃搫 Input Sequence:\n{input_sequence}")
46
  print(f"馃敩 Generated Sequence:\n{generated_sequence}")
 
47
  ```
48
 
49
  ## **Benchmark Performance**
 
26
  tokenizer = AutoTokenizer.from_pretrained("metagene-ai/METAGENE-1")
27
  model = AutoModelForCausalLM.from_pretrained("metagene-ai/METAGENE-1", torch_dtype=torch.bfloat16)
28
 
29
+ # Example input sequence
30
+ input_sequence = "TCACCGTTCTACAATCCCAAGCTGGAGTCAAGCTCAACAGGGTCTTC"
 
 
31
 
32
  # Tokenize the input sequence and truncate to the first 12 tokens
33
+ input_tokens = tokenizer.encode(input_sequence, return_tensors="pt", add_special_tokens=False)
34
 
35
  # Generate output from the model with a max sequence length of 32 tokens
36
  generated_tokens = model.generate(input_tokens, max_length=32)
 
39
  generated_sequence = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
40
  generated_sequence = generated_sequence.replace(" ", "").replace("_", "")
41
 
42
+ # Generated output: A Hexamita inflata 5.8S ribosomal RNA gene sequence
 
43
  print(f"馃敩 Generated Sequence:\n{generated_sequence}")
44
+ # TCACCGTTCTACAATCCCAAGCTGGAGTCAAGCTCAACAGGGTCTTCTTGCCCCGCTGAGGGTTACACTCGCCCGTTCCCGAGTCTGTGGTTTCGCGAAGATATGACCAGGGACAGTAAGAACC
45
  ```
46
 
47
  ## **Benchmark Performance**