import gradio as gr
import random

import requests

# Template
title = "A conversation with Gandalf (GPTJ-6B) 🧙"
description = ""
article = """
<p> To reset you <b>need to reload the page.</b> </p>
<p> If you liked don't forget to 💖 the project 🥰 </p>
<h2> Parameters: </h2>
<ul>
    <li><i>top_p</i>:  control how deterministic the model is in generating a response.</li>
    <li><i>temperature</i>: (sampling temperature) higher values means the model will take more risks.</li>
    <li><i>max_new_tokens</i>: Max number of tokens in generation.</li>
</ul>
<img src='http://www.simoninithomas.com/test/gandalf.jpg', alt="Gandalf"/>"""
theme="huggingface"
examples = [[0.9, 1.1, 50, "Hey Gandalf! How are you?"], [0.9, 1.1, 50, "Hey Gandalf, why you didn't use the great eagles to fly Frodo to Mordor?"]]

# GPT-J-6B API
API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-j-6B"
def query(payload):
  response = requests.post(API_URL, json=payload)
  return response.json()
context_setup = "The following is a conversation with Gandalf, the mage of 'the Lord of the Rings'"
context=context_setup
interlocutor_names = ["Human", "Gandalf"]

# Builds the prompt from what previously happened 
def build_prompt(conversation, context):
  prompt = context + "\n"
  for user_msg, resp_msg in conversation:
      line = "\n- " + interlocutor_names[0] + ":" + user_msg
      prompt += line
      line = "\n- " + interlocutor_names[1] + ":" + resp_msg
      prompt += line
  prompt += ""
  return prompt

# Attempt to recognize what the model said, if it used the correct format
def clean_chat_output(txt, prompt):
  delimiter = "\n- "+interlocutor_names[0]
  output = txt.replace(prompt, '')
  output = output[:output.find(delimiter)]
  return output


def chat(top_p, temperature, max_new_tokens, message):
    history = gr.get_state() or []
    history.append((message, ""))
    gr.set_state(history)
    conversation = history
    prompt = build_prompt(conversation, context)
    
    # Build JSON
    json_ = {"inputs": prompt,
         "parameters":
         {
         "top_p": top_p,
        "temperature": temperature,
        "max_new_tokens": max_new_tokens,
        "return_full_text": False
        }}
       
    output = query(json_)
    output = output[0]['generated_text']
    answer = clean_chat_output(output, prompt)
    response = answer
    history[-1] = (message, response)
    gr.set_state(history)
    html = "<div class='chatbot'>"
    for user_msg, resp_msg in history:
        html += f"<div class='user_msg'>{user_msg}</div>"
        html += f"<div class='resp_msg'>{resp_msg}</div>"
    html += "</div>"
    return html

iface = gr.Interface(
        chat, 
        [ 
            gr.inputs.Slider(minimum=0.5, maximum=1, step=0.05, default=0.9, label="top_p"),
            gr.inputs.Slider(minimum=0.5, maximum=1.5, step=0.1, default=1.1, label="temperature"),
            gr.inputs.Slider(minimum=20, maximum=250, step=10, default=50, label="max_new_tokens"),
            "text",
        ],
     "html", css="""
    .chatbox {display:flex;flex-direction:column}
    .user_msg, .resp_msg {padding:4px;margin-bottom:4px;border-radius:4px;width:80%}
    .user_msg {background-color:cornflowerblue;color:white;align-self:start}
    .resp_msg {background-color:lightgray;align-self:self-end}
""", allow_screenshot=True, 
allow_flagging=True,
title=title,
article=article,
theme=theme,
examples=examples)

if __name__ == "__main__":
  iface.launch()