File size: 2,897 Bytes
9d0e777 064d0ae 9d0e777 8ac7c1e 064d0ae cb536a9 9d0e777 1cf8caa 9d0e777 38ab966 9d0e777 d54999f 38ab966 9d0e777 cb536a9 56c2f3a cb536a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import torch
import torch.nn as nn
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
class _MLPVectorProjector(nn.Module):
def __init__(
self, input_hidden_size: int, lm_hidden_size: int, num_layers: int, width: int
):
super(_MLPVectorProjector, self).__init__()
self.mlps = nn.ModuleList()
for _ in range(width):
mlp = [nn.Linear(input_hidden_size, lm_hidden_size, bias=False)]
for _ in range(1, num_layers):
mlp.append(nn.GELU())
mlp.append(nn.Linear(lm_hidden_size, lm_hidden_size, bias=False))
self.mlps.append(nn.Sequential(*mlp))
def forward(self, x):
return torch.cat([mlp(x) for mlp in self.mlps], dim=-2)
model_name = "microsoft/phi-2"
phi2_text = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
def textMode(text, count):
count = int(count)
inputs = tokenizer(text, return_tensors="pt", return_attention_mask=False)
prediction = tokenizer.batch_decode(
phi2_text.generate(
**inputs,
max_new_tokens=count,
bos_token_id=tokenizer.bos_token_id,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id
)
)
return prediction[0].rstrip('<|endoftext|>').rstrip("\n")
def imageMode(image, question):
return "In progress"
def audioMode(audio):
return "In progress"
interface_title = "TSAI-ERA-V1 - Capstone - Multimodal GPT Demo"
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown(f"## **{interface_title}**")
gr.Markdown("Choose text mode/image mode/audio mode for generation")
with gr.Tab("Text mode"):
text_input = gr.Textbox(placeholder="Enter a prompt", label="Input")
text_input_count = gr.Textbox(placeholder="Enter number of characters you want to generate", label="Count")
text_button = gr.Button("Submit")
text_output = gr.Textbox(label="Chat GPT like text")
with gr.Tab("Image mode"):
with gr.Row():
image_input = gr.Image()
image_text_input = gr.Textbox(placeholder="Enter a question/prompt around the image", label="Question/Prompt")
image_button = gr.Button("Submit")
image_text_output = gr.Textbox(label="Answer")
with gr.Tab("Audio mode"):
audio_input = gr.Audio()
audio_button = gr.Button("Submit")
audio_text_output = gr.Textbox(label="Chat GPT like text")
text_button.click(textMode, inputs=[text_input, text_input_count], outputs=text_output)
image_button.click(imageMode, inputs=[image_input,image_text_input], outputs=image_text_output)
audio_button.click(audioMode, inputs=audio_input, outputs=audio_text_output)
demo.launch() |