Librezo
/

bog-001

+import torch
+import numpy as np
+import scipy
+# Define the hyperparameters
+num_layers = 2
+batch_size = 32
+hidden_dim = 256
+def random_rotation(inputs):
+    angle = np.random.uniform(-180, 180)
+    inputs = scipy.ndimage.rotate(inputs, angle, reshape=False)
+    return inputs
+def random_scaling(inputs):
+    scale = np.random.uniform(0.8, 1.2)
+    inputs = scipy.ndimage.zoom(inputs, scale)
+    return inputs
+def random_translation(inputs):
+    shift = np.random.uniform(-0.2, 0.2)
+    inputs = scipy.ndimage.shift(inputs, shift)
+    return inputs
+def random_shearing(inputs):
+    shear = np.random.uniform(-0.2, 0.2)
+    inputs = scipy.ndimage.shear(inputs, shear)
+    return inputs
+def random_flipping(inputs):
+    inputs = scipy.ndimage.flip(inputs, axis=1)
+    return inputs
+def data_augmentation(inputs):
+    # Apply random rotation
+    inputs = random_rotation(inputs)
+    # Apply random scaling
+    inputs = random_scaling(inputs)
+    # Apply random translation
+    inputs = random_translation(inputs)
+    # Apply random shearing
+    inputs = random_shearing(inputs)
+    # Apply random flipping
+    inputs = random_flipping(inputs)
+    return inputs
+def evaluate(model, test_data, hyperparameters, recurrent_network=False, pre_trained_model=False, fine_tuning=False):
+    # Use GPU for training if available
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    # Define the hidden state
+    hidden = (torch.zeros(num_layers, batch_size, hidden_dim).to(device),
+          torch.zeros(num_layers, batch_size, hidden_dim).to(device))
+    model.eval()
+    with torch.no_grad():
+        correct = 0
+        total = 0
+        for data in test_data:
+            inputs, labels = data
+            # Use data augmentation
+            inputs = data_augmentation(inputs)
+            # Use GPU for training
+            inputs = inputs.to(device)
+            labels = labels.to(device)
+            # Use recurrent network
+            if recurrent_network:
+                outputs = model(inputs, hidden)
+            else:
+                outputs = model(inputs)
+            # Use pre-trained model
+            if pre_trained_model:
+                outputs = model.forward_from_pretrained(inputs)
+            # Use fine-tuning
+            if fine_tuning:
+                outputs = model.fine_tune(inputs, hyperparameters)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+        accuracy = 100 * correct / total
+    return accuracy
+def adjust_learning_rate(optimizer, epoch):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = 0.001 * (0.1 ** (epoch // 30))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr

main.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torchtext import data
+from gensim.corpora import WikiCorpus
+from transformers import GPT2Tokenizer, GPT2Model
+from functions import *
+# Define the model
+# class GPT(nn.Module):
+#     def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
+#         super().__init__()
+#         self.embedding = nn.Embedding(vocab_size, embedding_dim)
+#         self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
+#         self.fc = nn.Linear(hidden_dim, vocab_size)
+#         self.gpt2 = model
+#     def forward(self, x):
+#         # Embed the input
+#         x = self.embedding(x)
+#         # Pass through the GPT2 model
+#         x = self.gpt2(x)
+#         # Pass through the LSTM
+#         x, _ = self.lstm(x)
+#         # Pass through the fully connected layer
+#         x = self.fc(x)
+#         return x
+# Load the GPT2 model
+print('load gpt2 model')
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+model = GPT2Model.from_pretrained('gpt2')
+# Load the data
+print('load custom data')
+# wiki_corpus_en = WikiCorpus('data/enwiki-latest-pages-articles.xml.bz2')
+wiki_corpus_fr = WikiCorpus('data/frwiki-latest-pages-articles.xml.bz2')
+# stackoverflow_corpus = data.TabularDataset('data/stackoverflow.csv', format='csv', fields=['text'])
+# Preprocess the data
+print('Preprocess the data')
+# wiki_data_en = [text for text in wiki_corpus_en]
+wiki_data_fr = [text for text in wiki_corpus_fr]
+# stackoverflow_data = [text for text in stackoverflow_corpus]
+# Convert the data to a format compatible with PyTorch
+print('Convert the data to a format compatible with PyTorch')
+# wiki_data_en = torch.tensor(wiki_data_en)
+wiki_data_fr = torch.tensor(wiki_data_fr)
+# stackoverflow_data = torch.tensor(stackoverflow_data)
+# Define the Adam optimizer
+print('Define the Adam optimizer')
+optimizer = optim.Adam(model.parameters(), lr=0.001)
+# Define the loss function
+print('Define the loss function')
+criterion = nn.CrossEntropyLoss()
+# Train the model
+print('Train the model')
+num_epochs=10
+labels = torch.tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 1])
+for epoch in range(num_epochs):
+    print('epoch: ' + epoch)
+    # Forward pass
+    # outputs = model(wiki_data, stackoverflow_data)
+    outputs = model(wiki_data_fr)
+    # Calculate the loss
+    loss = criterion(outputs, labels)
+    # Backward pass
+    loss.backward()
+    # Update the parameters
+    optimizer.step()
+    # Reset the gradients
+    optimizer.zero_grad()
+    # Evaluate the model
+    accuracy = evaluate(model, wiki_data_fr)
+    # Save the model weights and states
+    torch.save(model.state_dict(), 'model.pth')
+    # Adjust the learning rate
+    adjust_learning_rate(optimizer, epoch)
+    # Print the loss and accuracy
+    print('Epoch: {}, Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, loss.item(), accuracy))