MohamedAAK commited on
Commit
d66ba5d
·
verified ·
1 Parent(s): 400558a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import os
4
+
5
+ import torch
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
+
8
+ def ask(text):
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1")
11
+ model = AutoModelForCausalLM.from_pretrained
12
+ ("togethercomputer/RedPajama-INCITE-Chat-3B-v1",
13
+ torch_dtype=torch.bfloat16)
14
+
15
+ prompt = f’<human>: {text}\n<bot>:’
16
+ inputs = tokenizer(prompt, return_tensors=‘pt’).to(model.device)
17
+
18
+ input_length = inputs.input_ids.shape[1]
19
+ outputs = model.generate(**inputs, max_new_tokens=48, temperature=0.7,
20
+ return_dict_in_generate=True)
21
+
22
+ tokens = outputs.sequences[0, input_length:]
23
+ return tokenizer.decode(tokens)
24
+
25
+ with gr.Blocks() as server:
26
+ with gr.Tab("LLM Inferencing"):
27
+
28
+ model_input = gr.Textbox(label="Your Question:",
29
+ value="What’s your question?", interactive=True)
30
+ ask_button = gr.Button("Ask")
31
+ model_output = gr.Textbox(label="The Answer:", interactive=False,
32
+ value="Answer goes here...")
33
+
34
+ ask_button.click(ask, inputs=[model_input], outputs=[model_output])
35
+
36
+ server.launch()