GPT-2-with_gpu

Sleeping

sagar007 commited on Aug 23, 2024

Commit

d98db84

verified ·

1 Parent(s): 532845f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -129,13 +129,15 @@ def load_model(model_path):
     model.to(device)
     return model
-# Load the model
-model = load_model('gpt_model.pth')  # Replace with the actual path to your .pt file
 enc = tiktoken.get_encoding('gpt2')
 # Update the generate_text function
-@spaces.GPU(duration=60)  # Adjust duration as needed
 async def generate_text(prompt, max_length=432, temperature=0.8, top_k=40):
     device = next(model.parameters()).device
     input_ids = torch.tensor(enc.encode(prompt)).unsqueeze(0).to(device)
     generated = []
@@ -159,18 +161,11 @@ async def generate_text(prompt, max_length=432, temperature=0.8, top_k=40):
             if next_token.item() == enc.encode('\n')[0] and len(generated) > 100:
                 break
-            await asyncio.sleep(0.02)  # Slightly faster typing effect
     if len(generated) == max_length:
         yield "... (output truncated due to length)"
-# Update the gradio_generate function
-@spaces.GPU(duration=60)  # Adjust duration as needed
-async def gradio_generate(prompt, max_length, temperature, top_k):
-    output = ""
-    async for token in generate_text(prompt, max_length, temperature, top_k):
-        output += token
-        yield output
 # # Your existing imports and model code here...

     model.to(device)
     return model
+# Don't load the model here
+# model = load_model('gpt_model.pth')
 enc = tiktoken.get_encoding('gpt2')
 # Update the generate_text function
+@spaces.GPU(duration=60)
 async def generate_text(prompt, max_length=432, temperature=0.8, top_k=40):
+    # Load the model inside the GPU-decorated function
+    model = load_model('gpt_model.pth')
     device = next(model.parameters()).device
     input_ids = torch.tensor(enc.encode(prompt)).unsqueeze(0).to(device)
     generated = []
             if next_token.item() == enc.encode('\n')[0] and len(generated) > 100:
                 break
+            await asyncio.sleep(0.02)
     if len(generated) == max_length:
         yield "... (output truncated due to length)"
 # # Your existing imports and model code here...