littleworth
commited on
Commit
•
aa08ba0
1
Parent(s):
b7edfc6
Update README.md
Browse files
README.md
CHANGED
@@ -47,16 +47,13 @@ model_name = "littleworth/protgpt2-distilled-tiny"
|
|
47 |
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
48 |
model = GPT2LMHeadModel.from_pretrained(model_name)
|
49 |
|
50 |
-
# Ensure tokenizer is padding from the left
|
51 |
-
tokenizer.padding_side = "left"
|
52 |
-
|
53 |
# Initialize the pipeline
|
54 |
text_generator = TextGenerationPipeline(
|
55 |
model=model, tokenizer=tokenizer, device=0
|
56 |
) # specify device if needed
|
57 |
|
58 |
# Generate sequences
|
59 |
-
|
60 |
"<|endoftext|>",
|
61 |
max_length=100,
|
62 |
do_sample=True,
|
@@ -68,15 +65,20 @@ sequences = text_generator(
|
|
68 |
truncation=True,
|
69 |
)
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
74 |
# Remove newline characters and non-alphabetical characters
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
78 |
print(f">Seq_{i}")
|
79 |
-
print(
|
80 |
```
|
81 |
|
82 |
### Use Cases
|
|
|
47 |
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
48 |
model = GPT2LMHeadModel.from_pretrained(model_name)
|
49 |
|
|
|
|
|
|
|
50 |
# Initialize the pipeline
|
51 |
text_generator = TextGenerationPipeline(
|
52 |
model=model, tokenizer=tokenizer, device=0
|
53 |
) # specify device if needed
|
54 |
|
55 |
# Generate sequences
|
56 |
+
generated_sequences = text_generator(
|
57 |
"<|endoftext|>",
|
58 |
max_length=100,
|
59 |
do_sample=True,
|
|
|
65 |
truncation=True,
|
66 |
)
|
67 |
|
68 |
+
def clean_sequence(text):
|
69 |
+
# Remove the "<|endoftext|>" token
|
70 |
+
text = text.replace("<|endoftext|>", "")
|
71 |
+
|
72 |
# Remove newline characters and non-alphabetical characters
|
73 |
+
text = "".join(char for char in text if char.isalpha())
|
74 |
+
|
75 |
+
return text
|
76 |
+
|
77 |
+
# Print the generated sequences
|
78 |
+
for i, seq in enumerate(generated_sequences):
|
79 |
+
cleaned_text = clean_sequence(seq["generated_text"])
|
80 |
print(f">Seq_{i}")
|
81 |
+
print(cleaned_text)
|
82 |
```
|
83 |
|
84 |
### Use Cases
|