Spaces:
Paused
Paused
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import speech_recognition as sr | |
from gtts import gTTS | |
from pydub import AudioSegment | |
import io | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True): | |
formatted_text = "" | |
for message in messages: | |
if message["role"] == "system": | |
formatted_text += "\n" + message["content"] + "\n" | |
elif message["role"] == "user": | |
formatted_text += "\n" + message["content"] + "\n" | |
elif message["role"] == "assistant": | |
formatted_text += "\n" + message["content"].strip() + eos + "\n" | |
else: | |
raise ValueError( | |
"Tulu chat template only supports 'system', 'user', and 'assistant' roles. Invalid role: {}.".format( | |
message["role"] | |
) | |
) | |
formatted_text += "\n" | |
formatted_text = bos + formatted_text if add_bos else formatted_text | |
return formatted_text | |
def inference(input_prompts, model, tokenizer): | |
input_prompts = [ | |
create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False) | |
for input_prompt in input_prompts | |
] | |
encodings = tokenizer(input_prompts, padding=True, return_tensors="pt") | |
encodings = encodings.to(device) | |
with torch.no_grad(): | |
outputs = model.generate(encodings.input_ids, do_sample=False, max_length=250) | |
output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True) | |
input_prompts = [ | |
tokenizer.decode(tokenizer.encode(input_prompt), skip_special_tokens=True) for input_prompt in input_prompts | |
] | |
output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)] | |
return output_texts | |
def recognize_speech(): | |
recognizer = sr.Recognizer() | |
microphone = sr.Microphone() | |
with microphone as source: | |
print("Listening...") | |
recognizer.adjust_for_ambient_noise(source) | |
audio_data = recognizer.listen(source, timeout=5) | |
try: | |
print("Recognizing...") | |
text = recognizer.recognize_google(audio_data, language="hi-IN") | |
return text | |
except sr.UnknownValueError: | |
print("Speech Recognition could not understand audio.") | |
return "" | |
except sr.RequestError as e: | |
print(f"Could not request results from Google Speech Recognition service; {e}") | |
return "" | |
def text_to_speech(text): | |
tts = gTTS(text=text, lang="hi") | |
audio_stream = io.BytesIO() | |
tts.save(audio_stream) | |
audio = AudioSegment.from_file(io.BytesIO(audio_stream.read()), format="mp3") | |
return audio | |
def respond_to_input(input_text): | |
output_texts = inference([input_text], model, tokenizer) | |
output_text = output_texts[0] | |
output_audio = text_to_speech(output_text) | |
return output_text, output_audio.export(format="wav") | |
examples = [ | |
["मुझे अपने करियर के बारे में सुझाव दो", "मैं कैसे अध्ययन कर सकता हूँ?"], | |
["कृपया मुझे एक कहानी सुनाएं", "ताजमहल के बारे में कुछ बताएं"], | |
["मेरा नाम क्या है?", "आपका पसंदीदा फिल्म कौन सी है?"], | |
] | |
iface = gr.Interface( | |
fn=respond_to_input, | |
inputs=["text", "microphone"], | |
outputs=["text", "audio"], | |
live=True, | |
examples=examples, | |
title="CAMAI", | |
description="Type or speak to me, and I'll generate a response!", | |
theme="light", | |
) | |
iface.launch() | |
############################################################################################################################### | |
# import torch | |
# from transformers import AutoTokenizer, AutoModelForCausalLM | |
# import gradio as gr | |
# device = "cuda" if torch.cuda.is_available() else "cpu" | |
# def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True): | |
# formatted_text = "" | |
# for message in messages: | |
# if message["role"] == "system": | |
# formatted_text += "<|system|>\n" + message["content"] + "\n" | |
# elif message["role"] == "user": | |
# formatted_text += "<|user|>\n" + message["content"] + "\n" | |
# elif message["role"] == "assistant": | |
# formatted_text += "<|assistant|>\n" + message["content"].strip() + eos + "\n" | |
# else: | |
# raise ValueError( | |
# "Tulu chat template only supports 'system', 'user' and 'assistant' roles. Invalid role: {}.".format( | |
# message["role"] | |
# ) | |
# ) | |
# formatted_text += "<|assistant|>\n" | |
# formatted_text = bos + formatted_text if add_bos else formatted_text | |
# return formatted_text | |
# def inference(input_prompts, model, tokenizer): | |
# input_prompts = [ | |
# create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False) | |
# for input_prompt in input_prompts | |
# ] | |
# encodings = tokenizer(input_prompts, padding=True, return_tensors="pt") | |
# encodings = encodings.to(device) | |
# with torch.inference_mode(): | |
# outputs = model.generate(encodings.input_ids, do_sample=False, max_new_tokens=250) | |
# output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True) | |
# input_prompts = [ | |
# tokenizer.decode(tokenizer.encode(input_prompt), skip_special_tokens=True) for input_prompt in input_prompts | |
# ] | |
# output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)] | |
# return output_texts | |
# model_name = "ai4bharat/Airavata" | |
# tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left") | |
# tokenizer.pad_token = tokenizer.eos_token | |
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device) | |
# def respond_to_text(input_text): | |
# outputs = inference([input_text], model, tokenizer) | |
# return outputs[0] | |
# input_prompts = [ | |
# "मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं।", | |
# "मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं और उनका वर्णन करें।", | |
# ] | |
# iface = gr.Interface(fn=respond_to_text, inputs="text", outputs="text") | |
# iface.launch() | |
######################################################################################## | |
# import gradio as gr | |
# from transformers import AutoTokenizer, AutoModelForCausalLM | |
# tokenizer = AutoTokenizer.from_pretrained("ai4bharat/Airavata") | |
# model = AutoModelForCausalLM.from_pretrained("ai4bharat/Airavata") | |
# def generate_response(prompt): | |
# input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=50) | |
# output_ids = model.generate(input_ids, max_length=100, num_beams=5, no_repeat_ngram_size=2) | |
# response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
# return response | |
# iface = gr.Interface( | |
# fn=generate_response, | |
# inputs="text", | |
# outputs="text", | |
# live=True, | |
# title="Airavata LLMs Chatbot", | |
# description="Ask me anything, and I'll generate a response!", | |
# theme="light", | |
# ) | |
# iface.launch() | |
# import gradio as gr | |
# import torch | |
# from transformers import AutoTokenizer, AutoModelForCausalLM | |
# device = "cuda" if torch.cuda.is_available() else "cpu" | |
# def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True): | |
# formatted_text = "" | |
# for message in messages: | |
# if message["role"] == "system": | |
# formatted_text += "\n" + message["content"] + "\n" | |
# elif message["role"] == "user": | |
# formatted_text += "\n" + message["content"] + "\n" | |
# elif message["role"] == "assistant": | |
# formatted_text += "\n" + message["content"].strip() + eos + "\n" | |
# else: | |
# raise ValueError( | |
# "Tulu chat template only supports 'system', 'user', and 'assistant' roles. Invalid role: {}.".format( | |
# message["role"] | |
# ) | |
# ) | |
# formatted_text += "\n" | |
# formatted_text = bos + formatted_text if add_bos else formatted_text | |
# return formatted_text | |
# def inference(input_prompts, model, tokenizer): | |
# input_prompts = [ | |
# create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False) | |
# for input_prompt in input_prompts | |
# ] | |
# encodings = tokenizer(input_prompts, padding=True, return_tensors="pt") | |
# encodings = encodings.to(device) | |
# with torch.no_grad(): | |
# outputs = model.generate(encodings.input_ids, do_sample=False, max_length=250) | |
# output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True) | |
# input_prompts = [ | |
# tokenizer.decode(tokenizer.encode(input_prompt), skip_special_tokens=True) for input_prompt in input_prompts | |
# ] | |
# output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)] | |
# return output_texts | |
# model_name = "ai4bharat/Airavata" | |
# tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left") | |
# tokenizer.pad_token = tokenizer.eos_token | |
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device) | |
# examples = [ | |
# ["मुझे अपने करियर के बारे में सुझाव दो", "मैं कैसे अध्ययन कर सकता हूँ?"], | |
# ["कृपया मुझे एक कहानी सुनाएं", "ताजमहल के बारे में कुछ बताएं"], | |
# ["मेरा नाम क्या है?", "आपका पसंदीदा फिल्म कौन सी है?"], | |
# ] | |
# iface = gr.Chat( | |
# model_fn=lambda input_prompts: inference(input_prompts, model, tokenizer), | |
# inputs=["text"], | |
# outputs="text", | |
# examples=examples, | |
# title="Airavata Chatbot", | |
# theme="light", # Optional: Set a light theme | |
# ) | |
# iface.launch() | |