Spaces:

sagar007
/

GPT-2

Running

App Files Files Community

sagar007 commited on 7 days ago

Commit

aaa17ba

verified ·

1 Parent(s): 25893d0

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -70

app.py CHANGED Viewed

@@ -3,31 +3,9 @@ import torch.nn as nn
 from torch.nn import functional as F
 import tiktoken
 import gradio as gr
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-import tiktoken
-import gradio as gr
-import asyncio
-import gradio as gr
 import asyncio
-# Add the post-processing function here
-def post_process_text(text):
-    # Ensure the text starts with a capital letter
-    text = text.capitalize()
-    # Remove any incomplete sentences at the end
-    sentences = text.split('.')
-    complete_sentences = sentences[:-1] if len(sentences) > 1 else sentences
-    # Rejoin sentences and add a period if missing
-    processed_text = '. '.join(complete_sentences)
-    if not processed_text.endswith('.'):
-        processed_text += '.'
-    return processed_text
-# Define the model architecture
 class GPTConfig:
     def __init__(self):
         self.block_size = 1024
@@ -36,6 +14,7 @@ class GPTConfig:
         self.n_head = 12
         self.n_embd = 768
 class CausalSelfAttention(nn.Module):
     def __init__(self, config):
         super().__init__()
@@ -43,7 +22,6 @@ class CausalSelfAttention(nn.Module):
         self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd)
         self.c_proj = nn.Linear(config.n_embd, config.n_embd)
         self.n_head = config.n_head
-        self.n_embd = config.n_embd
         self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size)).view(1, 1, config.block_size, config.block_size))
     def forward(self, x):
@@ -56,6 +34,7 @@ class CausalSelfAttention(nn.Module):
         y = y.transpose(1, 2).contiguous().view(B, T, C)
         return self.c_proj(y)
 class MLP(nn.Module):
     def __init__(self, config):
         super().__init__()
@@ -66,6 +45,7 @@ class MLP(nn.Module):
     def forward(self, x):
         return self.c_proj(self.gelu(self.c_fc(x)))
 class Block(nn.Module):
     def __init__(self, config):
         super().__init__()
@@ -79,6 +59,7 @@ class Block(nn.Module):
         x = x + self.mlp(self.ln_2(x))
         return x
 class GPT(nn.Module):
     def __init__(self, config):
         super().__init__()
@@ -121,15 +102,17 @@ class GPT(nn.Module):
         return logits, loss
-# Load the model
 def load_model(model_path):
     config = GPTConfig()
     model = GPT(config)
-    checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
-    print("Checkpoint keys:", checkpoint.keys())  # Debug print
     if 'model_state_dict' in checkpoint:
         model.load_state_dict(checkpoint['model_state_dict'])
     else:
@@ -138,59 +121,59 @@ def load_model(model_path):
     model.eval()
     return model
-# Load the model
-model = load_model('gpt_model.pth')  # Replace with the actual path to your .pt file
-enc = tiktoken.get_encoding('gpt2')
-# Improved text generation function
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-import tiktoken
-import gradio as gr
-# [Your existing model code remains unchanged]
-# Modify the generate_text function to be asynchronous
 async def generate_text(prompt, max_length=432, temperature=0.8, top_k=40):
-    input_ids = torch.tensor(enc.encode(prompt)).unsqueeze(0)
     generated = []
     with torch.no_grad():
         for _ in range(max_length):
-            outputs, _ = model(input_ids)
-            next_token_logits = outputs[:, -1, :]
-            next_token_logits = next_token_logits / temperature
-            top_k_logits, top_k_indices = torch.topk(next_token_logits, top_k, dim=-1)
-            next_token_probs = F.softmax(top_k_logits, dim=-1)
-            next_token_index = torch.multinomial(next_token_probs, num_samples=1)
-            next_token = top_k_indices.gather(-1, next_token_index)
-            input_ids = torch.cat([input_ids, next_token], dim=-1)
-            generated.append(next_token.item())
-            next_token_str = enc.decode([next_token.item()])
-            yield next_token_str
-            if next_token.item() == enc.encode('\n')[0] and len(generated) > 100:
-                break
-            await asyncio.sleep(0.02)  # Slightly faster typing effect
-    if len(generated) == max_length:
-        yield "... (output truncated due to length)"
-# Modify the gradio_generate function to be asynchronous
 async def gradio_generate(prompt, max_length, temperature, top_k):
     output = ""
     async for token in generate_text(prompt, max_length, temperature, top_k):
         output += token
         yield output
-# Custom CSS for the animation effect
-import gradio as gr
-import asyncio
-# Your existing imports and model code here...
 css = """
 <style>

 from torch.nn import functional as F
 import tiktoken
 import gradio as gr
 import asyncio
+# Model Configuration
 class GPTConfig:
     def __init__(self):
         self.block_size = 1024
         self.n_head = 12
         self.n_embd = 768
+# Causal Self-Attention
 class CausalSelfAttention(nn.Module):
     def __init__(self, config):
         super().__init__()
         self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd)
         self.c_proj = nn.Linear(config.n_embd, config.n_embd)
         self.n_head = config.n_head
         self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size)).view(1, 1, config.block_size, config.block_size))
     def forward(self, x):
         y = y.transpose(1, 2).contiguous().view(B, T, C)
         return self.c_proj(y)
+# Multi-Layer Perceptron
 class MLP(nn.Module):
     def __init__(self, config):
         super().__init__()
     def forward(self, x):
         return self.c_proj(self.gelu(self.c_fc(x)))
+# Transformer Block
 class Block(nn.Module):
     def __init__(self, config):
         super().__init__()
         x = x + self.mlp(self.ln_2(x))
         return x
+# GPT Model
 class GPT(nn.Module):
     def __init__(self, config):
         super().__init__()
         return logits, loss
+# Load Model
 def load_model(model_path):
     config = GPTConfig()
     model = GPT(config)
+    try:
+      checkpoint = torch.load(model_path, map_location=torch.device('cpu')) # Load on CPU first
+    except FileNotFoundError:
+      raise FileNotFoundError(f"Model file not found at: {model_path}")
+    except Exception as e:
+      raise Exception(f"Error loading model: {e}")
     if 'model_state_dict' in checkpoint:
         model.load_state_dict(checkpoint['model_state_dict'])
     else:
     model.eval()
     return model
+# Text Post-processing
+def post_process_text(text):
+    text = text.capitalize()
+    sentences = text.split('.')
+    complete_sentences = sentences[:-1] if len(sentences) > 1 else sentences
+    processed_text = '. '.join(complete_sentences)
+    if not processed_text.endswith('.'):
+        processed_text += '.'
+    return processed_text
+# Text Generation Function (Asynchronous)
 async def generate_text(prompt, max_length=432, temperature=0.8, top_k=40):
+    enc = tiktoken.get_encoding('gpt2')
+    input_ids = torch.tensor(enc.encode(prompt)).unsqueeze(0).to(device)
     generated = []
     with torch.no_grad():
         for _ in range(max_length):
+            try:
+                outputs, _ = model(input_ids)
+                next_token_logits = outputs[:, -1, :]
+                next_token_logits = next_token_logits / temperature
+                top_k_logits, top_k_indices = torch.topk(next_token_logits, top_k, dim=-1)
+                next_token_probs = F.softmax(top_k_logits, dim=-1)
+                next_token_index = torch.multinomial(next_token_probs, num_samples=1)
+                next_token = top_k_indices.gather(-1, next_token_index)
+                input_ids = torch.cat([input_ids, next_token], dim=-1)
+                generated.append(next_token.item())
+                next_token_str = enc.decode([next_token.item()])
+                yield next_token_str
+                if next_token.item() == enc.encode('\n')[0] and len(generated) > 100:
+                    break
+                await asyncio.sleep(0.02)  # For typing effect
+            except Exception as e:
+                yield f"Error during generation: {e}"
+                return
+# Gradio Generate Function
 async def gradio_generate(prompt, max_length, temperature, top_k):
     output = ""
     async for token in generate_text(prompt, max_length, temperature, top_k):
         output += token
         yield output
+    output = post_process_text(output)
+    yield output
+# Load the model (replace with your model path
 css = """
 <style>