"""import gradio as gr
import torch
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
import pytz
from datetime import datetime
print("Loading model and tokenizer...")
model_name = "large-traversaal/Phi-4-Hindi"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
print("Model and tokenizer loaded successfully!")
option_mapping = {"translation": "### TRANSLATION ###", "mcq": "### MCQ ###", "nli": "### NLI ###", "summarization": "### SUMMARIZATION ###", "Boolean": "### BOOLEAN ###",
    "long response": "### LONG RESPONSE ###", "direct response": "### DIRECT RESPONSE ###", "paraphrase": "### PARAPHRASE ###", "code": "### CODE ###"}
def generate_response(message, temperature, max_new_tokens, top_p, task):
    append_text = option_mapping.get(task, "")
    prompt = f"INPUT : {message} {append_text} RESPONSE : "
    print(f"Prompt: {prompt}")
    start_time = time.time()
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
    gen_kwargs = {"input_ids": inputs["input_ids"], "streamer": streamer, "temperature": temperature, "max_new_tokens": max_new_tokens, "top_p": top_p, "do_sample": True if temperature > 0 else False,}
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()
    result = []
    for text in streamer:
        result.append(text)
        yield "".join(result)
    end_time = time.time()
    time_taken = end_time - start_time
    output_text = "".join(result)
    if "RESPONSE : " in output_text:
        output_text = output_text.split("RESPONSE : ", 1)[1].strip()
    print(f"Output: {output_text}")
    print(f"Time taken: {time_taken:.2f} seconds")
    pst_timezone = pytz.timezone('America/Los_Angeles')
    current_time_pst = datetime.now(pst_timezone).strftime("%Y-%m-%d %H:%M:%S %Z%z")
    print(f"Current timestamp (PST): {current_time_pst}")
with gr.Blocks() as demo:
    gr.Markdown("# Phi-4-Hindi Demo")
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(label="Input", placeholder="Enter your text here...", lines=5)
            task_dropdown = gr.Dropdown(choices=["boolean", "translation", "mcq", "nli", "summarization", "long response", "direct response", "paraphrase", "code"], value="long response", label="Task")
            with gr.Row():
                with gr.Column():
                    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Temperature")
                with gr.Column():
                    max_new_tokens = gr.Slider(minimum=50, maximum=1000, value=400, step=10, label="Max New Tokens")
                with gr.Column():
                    top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Top P")
            with gr.Row():
                clear_btn = gr.Button("Clear")
                send_btn = gr.Button("Send", variant="primary")
        with gr.Column():
            output_text = gr.Textbox(label="Output", lines=15)
    send_btn.click(fn=generate_response, inputs=[input_text, temperature, max_new_tokens, top_p, task_dropdown], outputs=output_text)
    clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[input_text, output_text])
if __name__ == "__main__":
    demo.queue().launch()
"""
import gradio as gr
import torch
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
import pytz
from datetime import datetime
print("Loading model and tokenizer...")
model_name = "large-traversaal/Phi-4-Hindi"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
print("Model and tokenizer loaded successfully!")
option_mapping = {"translation": "### TRANSLATION ###", "mcq": "### MCQ ###", "nli": "### NLI ###", "summarization": "### SUMMARIZATION ###",
    "long response": "### LONG RESPONSE ###", "direct response": "### DIRECT RESPONSE ###", "paraphrase": "### PARAPHRASE ###", "code": "### CODE ###"}
def generate_response(message, temperature, max_new_tokens, top_p, task):
    append_text = option_mapping.get(task, "")
    prompt = f"INPUT : {message} {append_text} RESPONSE : "
    print(f"Prompt: {prompt}")
    start_time = time.time()
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(input_ids=inputs, max_new_tokens=max_new_tokens, use_cache=True, temperature=temperature, min_p=top_p, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    processed_response = response.split("RESPONSE :")[-1].strip()
    end_time = time.time()
    time_taken = end_time - start_time
    print(f"Output: {processed_response}")
    print(f"Time taken: {time_taken:.2f} seconds")
    pst_timezone = pytz.timezone('America/Los_Angeles')
    current_time_pst = datetime.now(pst_timezone).strftime("%Y-%m-%d %H:%M:%S %Z%z")
    print(f"Current timestamp (PST): {current_time_pst}")
    return processed_response
with gr.Blocks(theme='1024m/1024m-1') as demo:
    gr.Markdown("# Phi-4-Hindi Demo")
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(label="Input", placeholder="Enter your text here...", lines=5)
            task_dropdown = gr.Dropdown(choices=["translation", "mcq", "nli", "summarization", "long response", "direct response", "paraphrase", "code"], value="long response", label="Task")
            with gr.Row():
                with gr.Column():
                    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Temperature")
                with gr.Column():
                    max_new_tokens = gr.Slider(minimum=10, maximum=1000, value=10, step=10, label="Max New Tokens")
                with gr.Column():
                    top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Top P")
            with gr.Row():
                clear_btn = gr.Button("Clear")
                send_btn = gr.Button("Send", variant="primary")
        with gr.Column():
            output_text = gr.Textbox(label="Output", lines=15)
    send_btn.click(fn=generate_response, inputs=[input_text, temperature, max_new_tokens, top_p, task_dropdown], outputs=output_text)
    clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[input_text, output_text])
if __name__ == "__main__":
    demo.queue().launch()