TheAutonomous
commited on
Commit
•
0f9b91a
1
Parent(s):
fd71db7
Upload 4 files
Browse files- Inference.py +46 -0
- TrainData.txt +118 -0
- __init__.py +64 -0
- cached_lm_GPT2Tokenizer_128_TrainData.txt +0 -0
Inference.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, time, torch, warnings
|
2 |
+
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
3 |
+
|
4 |
+
class Inference():
|
5 |
+
|
6 |
+
def __init__(self, silent=False) -> None:
|
7 |
+
start_time = time.perf_counter()
|
8 |
+
self.tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
|
9 |
+
self.model = GPT2LMHeadModel.from_pretrained(self.local_file_path("SaveState"))
|
10 |
+
self.model.eval()
|
11 |
+
if not silent:
|
12 |
+
print(f"Model Loading Took {time.perf_counter()-start_time} Seconds")
|
13 |
+
|
14 |
+
def local_file_path(self, path):
|
15 |
+
return os.path.join(os.path.dirname(os.path.abspath(__file__)), path)
|
16 |
+
|
17 |
+
def generate(self, prompt, max_length=2000, temperature=0.5, do_sample=True, stop_token=None, callback=None, silent=True):
|
18 |
+
with warnings.catch_warnings():
|
19 |
+
warnings.simplefilter("ignore")
|
20 |
+
start_time = time.perf_counter()
|
21 |
+
input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
|
22 |
+
generated_text = input_ids
|
23 |
+
while generated_text.shape[1] < max_length:
|
24 |
+
length = min(50, max_length - generated_text.shape[1])
|
25 |
+
with torch.no_grad():
|
26 |
+
outputs = self.model.generate(input_ids, max_length=length, temperature=temperature, do_sample=do_sample, pad_token_id=self.tokenizer.eos_token_id)
|
27 |
+
new_tokens = outputs[0][-length:]
|
28 |
+
if callback is not None:
|
29 |
+
for token in new_tokens:
|
30 |
+
callback(self.tokenizer.decode([token]))
|
31 |
+
generated_text = torch.cat((generated_text, new_tokens.unsqueeze(0)), dim=-1)
|
32 |
+
input_ids = new_tokens.unsqueeze(0)
|
33 |
+
if stop_token is not None and stop_token in self.tokenizer.decode(generated_text[0]):
|
34 |
+
break
|
35 |
+
if not silent:
|
36 |
+
print(f"Model Loading Took {time.perf_counter()-start_time} Seconds")
|
37 |
+
return self.tokenizer.decode(generated_text[0], skip_special_tokens=True)
|
38 |
+
|
39 |
+
Inference = Inference()
|
40 |
+
|
41 |
+
def spec(stre):
|
42 |
+
print(stre, end="")
|
43 |
+
|
44 |
+
if __name__=="__main__":
|
45 |
+
while True:
|
46 |
+
print(Inference.generate(input(">>> "), max_length=100, temperature=0.8, silent=True))
|
TrainData.txt
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Scene: Sunshine
|
2 |
+
Person1: Ahh beautiful sunshine
|
3 |
+
Person2: I love the way it bounces off your - beautiful face
|
4 |
+
Person1: You were going to say something else there
|
5 |
+
Person2: I resisted the bald joke because it's still quite early in the show but now you've opened the door so here it goes, you have no hair
|
6 |
+
Person1: I know I don't have any hair, but no we should not fight. It is our anniversary night.
|
7 |
+
Person2: It is our anniversary
|
8 |
+
Person1: Yes
|
9 |
+
Person2: And we always come to this picnic spot for our anniversary in the sunshine
|
10 |
+
Person1: Now come, let us eat some bread and cheese.
|
11 |
+
Person2: For we are...
|
12 |
+
Both: French
|
13 |
+
Person1: Thank You
|
14 |
+
Person2: To us
|
15 |
+
Person1: To us
|
16 |
+
Both: Clink
|
17 |
+
Person2: So how's your affair going
|
18 |
+
Person1: ha ha
|
19 |
+
Person2: I know. *Spits out wine*
|
20 |
+
Person1: What did you just do?
|
21 |
+
Person2: I kept the wine in my mouth the whole time because I was going to dramatically reveal that it was poisoned. You're going to die in the sunshine for what you did I know about Jean Claude.
|
22 |
+
Person1: Jean Claude!? Jean Claude and I are just buddies!
|
23 |
+
Person2: Buddies? More like sex buddies! I turn the corner of bakers street and there you two were raw dogging it in the streets!
|
24 |
+
Person1: No we weren't! We were just having good times together having a play roll!
|
25 |
+
Person2: A play roll!? You think that I am that naive?
|
26 |
+
Person1: Oh-
|
27 |
+
Person2: Wow this is a very slow acting poison
|
28 |
+
Person3: OH MY GOODNESS!! THAT MAN IS BALD!
|
29 |
+
**End**
|
30 |
+
Scene: Territory
|
31 |
+
Person1: Easy there, you're about to step on my territory. You actually have.
|
32 |
+
Person2: Well I see I've made some kind of mistake
|
33 |
+
Person1: Well all you gotta do is just wander back over there and we ain't got no problems.
|
34 |
+
Person2: I just gotta wander back over there and we ain't gonna have a problem?
|
35 |
+
Person1: Oh what, are you repeating everything I'm saying? What is there an echo in here?
|
36 |
+
Person2: I JUST WANNA BE REAL CLEAR ON WHATS I GOTS TO DO! I JUST HAVE TO WALK OVER THERE AND WE AIN'T GONNA HAVE NO MORE PROBLEMS IS THAT WHAT YA SAID!?
|
37 |
+
Person1: I ain't saying we're gonna not have no problems I mean we still is gonna have taxation and relationship troubles. I'm saying the problem of me having a gun pointed on you is gonna diminish awful quick like
|
38 |
+
Person2: So you were being over generalized before when you said we ain't gonna have no problems you meant this new specific problem will be gone but we'll still have problems such as inflation.
|
39 |
+
Person1: I apoligize for not being hyper-specific with my initial sentence. I will try to do better in the future. Now please move exactly two meters to your left...
|
40 |
+
Person2: I UNDERSTAND YOUR MISTAKE THAT RHETORIC CAN BE A TRICKY MISTRESS I WANT TO UNDERSTAND PRECISELY WHAT WILL OCCUR IF I'M TO STEP BACK OVER THERE?
|
41 |
+
Person3: Hey if you do too far you'll be on my property and I don't want there to be any issues because thats my-
|
42 |
+
Person2: DO YOU UNDERSTAND WHAT HE'S SAYIN?
|
43 |
+
Person1: I dont give a f*** about what he's saying.
|
44 |
+
Person2: We have no idea what your sayin you made this a 3 body problem here which is an unsolvable mathematical issue.
|
45 |
+
Person3: I'm just saying that this is my turf right here okay-
|
46 |
+
Person2: only you are on my best I do not see why
|
47 |
+
Person1: When you say thats your turf do you only own the turf or do you own the land underneath it. If we don't go on the turf are we still allowed over there or is it a property line? Be more specific!
|
48 |
+
**End**
|
49 |
+
Scene: Be More Specific
|
50 |
+
Person1: Just be more specific about it.
|
51 |
+
Person2: Okay okay so... like...
|
52 |
+
Person1: Cmon!
|
53 |
+
Person2: Im leaving for work, right? I hear a knock at the door and its the delivery driver and I say "Oh hello!" and he says "Hello!" and he is delivering a parcel and he says its for me and he hands me a stylus to sign for it and my hand slips and the stylus goes through the device into his heart and now he's dead.
|
54 |
+
Person1: Okay you're underarrest.
|
55 |
+
**End**
|
56 |
+
Scene: Hunting
|
57 |
+
Person1: I've been hunting wild and exotic animals for about two weeks now and I realize I really should have prepared a lot more, as you can see I've lost me legs and me arms. Turns out crocodiles, even if they look like they're smiling, aren't actually your friends. Thanks for coming to the Ted Talk. Don't f*** with nature 'cause nature f***s back. Now what I wanna recommend for anyone who wants to do what I did is do your research. Do your reading and start with something small like a doll house. Very unlikely a doll house is going to give you such grievous bodily injuries. Nexy slide!
|
58 |
+
Person2: *Snaps Fingers*
|
59 |
+
Person1: There's Jim, he's my assistant. Since the incident I can't click anymore so we made a click activated powerpoint presentation. As you can see this is a photo of the incident, actually about 2 seconds before, and I am now waving at the crocodile in the water.
|
60 |
+
Person2: *Snaps Fingers*
|
61 |
+
Person1: Now I'm really in water. As you can see one of the arms is gone. Why did I not run away then you might ask? I've always been a stubborn man.
|
62 |
+
**End**
|
63 |
+
Scene: It's Okay
|
64 |
+
Person1: Hey it's okay
|
65 |
+
Person2:I just um... I just need this time.
|
66 |
+
Person1: You want us to leave?
|
67 |
+
Person2: Yeah
|
68 |
+
Person1: *Leaves*
|
69 |
+
Person3: *Leaves*
|
70 |
+
Person2: No wait don't leave.
|
71 |
+
Person3: Listen mate it will be okay
|
72 |
+
Person2: I don't want to be in a solo scene
|
73 |
+
Person1: That was my bad mate I really thought someone might come in at some point
|
74 |
+
Person2: I just... sometimes it's funny just to let someone off stage
|
75 |
+
Person3: Listen it's alright if you can't think of anything
|
76 |
+
Person1: I know its your first time doing improve Person2
|
77 |
+
Person2: Its just I've run out of ideas
|
78 |
+
Person1: Well I am sure you can think of something
|
79 |
+
Person2: You just have to be funny every moment and it's just f***ing exhausting. Plus we are running out of accents we've done Australian, German, French, British...
|
80 |
+
Person3: Don't do the offensive ones. Leave those in the nineties
|
81 |
+
Person2: I don't know mate I think I should just quit.
|
82 |
+
Person1: We don't want you to go... We just gotta do a couple more games in this half then have a break and a lemonade and just come back for the second half, alright?
|
83 |
+
**End**
|
84 |
+
Scene: Digging
|
85 |
+
Person1: That's good Laddy, Listen we need to get these bodies in here by the time the police come alright
|
86 |
+
Person2: and these
|
87 |
+
Person1: and th- f***in ell how many people?
|
88 |
+
Person2: sixpence word 6 pennies
|
89 |
+
Person1: My goodness. We've robbed this bank okay and it's gone a bit wrong there's some- we've killed a few people all right this is...
|
90 |
+
Person3: violence criminel
|
91 |
+
Person1: I know this is what we are. we're violent criminals. We are violent criminals! Person3 are you alright?
|
92 |
+
Person3: Volie destruction
|
93 |
+
Person1: I know we blew up that bank
|
94 |
+
Person3: Ah sense negative
|
95 |
+
Person1: let's not In know you put-
|
96 |
+
Person2: Do you understand it or do you find it confusing?
|
97 |
+
Person3: Oh! Le blue fance!
|
98 |
+
Person1: Oh my goodness the police!! Get down! It's the police!
|
99 |
+
Person2: She glanced out of the window
|
100 |
+
Person1: No No! Their going past just keep quiet!
|
101 |
+
Person2: MONEY!
|
102 |
+
Person3: VIOLENCE CRIMINEL!
|
103 |
+
Person1: NO NO! SHUSH!
|
104 |
+
Person2: I EXPECT YOU'LL BE WANTING SOME CHANGE
|
105 |
+
Person1: Listen, listen, guys keep your f***ing voices down.
|
106 |
+
Person2: MONEYYYYYY!!!!
|
107 |
+
Person1: Okay I think they've gone. You want money? Fine here's your money
|
108 |
+
Person2: Seventy Seven
|
109 |
+
Person1: Seventy Seven pounds. Here you go. Congratulations. And for you-
|
110 |
+
Person2: 77!?
|
111 |
+
Person1: That's what we agreed-
|
112 |
+
Person3: TYRANNY
|
113 |
+
Person2: 77!?
|
114 |
+
Person1: Guys. Listen it wasn't the biggest bank robbery in the world. We robbed 100 quid. Alright? You get 77
|
115 |
+
Person2: Money?
|
116 |
+
Person1: I'll get 10 and Person3 gets whatever the rest of the maths is.
|
117 |
+
Person3: Uhhhh... 108.
|
118 |
+
**End**
|
__init__.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
|
5 |
+
|
6 |
+
class GptHumorTrainer:
|
7 |
+
|
8 |
+
def __init__(self, silent=False) -> None:
|
9 |
+
start_time = time.perf_counter()
|
10 |
+
self.tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
|
11 |
+
self.model = GPT2LMHeadModel.from_pretrained(self.local_file_path("SaveState"))
|
12 |
+
self.model.eval()
|
13 |
+
if not silent:
|
14 |
+
print(f"Model Loading Took {time.perf_counter()-start_time} Seconds")
|
15 |
+
|
16 |
+
def local_file_path(self, path):
|
17 |
+
return os.path.join(os.path.dirname(os.path.abspath(__file__)), path)
|
18 |
+
|
19 |
+
def train(self, train_file, epochs=3):
|
20 |
+
device = torch.device("cpu")
|
21 |
+
self.model.to(device)
|
22 |
+
|
23 |
+
# Prepare the dataset
|
24 |
+
train_dataset = TextDataset(
|
25 |
+
tokenizer=self.tokenizer,
|
26 |
+
file_path=train_file,
|
27 |
+
block_size=128,
|
28 |
+
)
|
29 |
+
|
30 |
+
# We use a special data collator for language modeling tasks
|
31 |
+
data_collator = DataCollatorForLanguageModeling(
|
32 |
+
tokenizer=self.tokenizer,
|
33 |
+
mlm=False,
|
34 |
+
)
|
35 |
+
|
36 |
+
for epoch in range(epochs):
|
37 |
+
# Define the training arguments for each epoch
|
38 |
+
training_args = TrainingArguments(
|
39 |
+
output_dir=f"./results/epoch_{epoch+1}", # The output directory for this epoch
|
40 |
+
overwrite_output_dir=True, # Overwrite the content of the output directory
|
41 |
+
num_train_epochs=3, # Train for 1 epoch at a time
|
42 |
+
per_device_train_batch_size=3, # Batch size for training
|
43 |
+
save_steps=-1, # Save model after each epoch
|
44 |
+
save_total_limit=None, # No limit on the total amount of checkpoints
|
45 |
+
prediction_loss_only=True, # Focus on the prediction loss only
|
46 |
+
)
|
47 |
+
|
48 |
+
# Initialize the Trainer
|
49 |
+
trainer = Trainer(
|
50 |
+
model=self.model,
|
51 |
+
args=training_args,
|
52 |
+
data_collator=data_collator,
|
53 |
+
train_dataset=train_dataset,
|
54 |
+
)
|
55 |
+
|
56 |
+
# Train the model for one epoch
|
57 |
+
trainer.train()
|
58 |
+
|
59 |
+
# Save the model after each epoch
|
60 |
+
self.model.save_pretrained(self.local_file_path("SaveState"))
|
61 |
+
|
62 |
+
if __name__ == "__main__":
|
63 |
+
humor_trainer = GptHumorTrainer()
|
64 |
+
humor_trainer.train(humor_trainer.local_file_path("TrainData.txt"), epochs=5) # Replace with the path to your training file
|
cached_lm_GPT2Tokenizer_128_TrainData.txt
ADDED
Binary file (5.75 kB). View file
|
|