import gradio as gr
import torch
from transformers import pipeline, GPTJForCausalLM, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, PeftModel, PeftConfig

# config = PeftConfig.from_pretrained("hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es")
# model = AutoModelForCausalLM.from_pretrained("hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es", return_dict=True, load_in_8bit=True, device_map='auto')

# # load tokenizer
# tokenizer = AutoTokenizer.from_pretrained("hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es")

# # Load the Lora model
# model = PeftModel.from_pretrained(model, "hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es")


# # load fp 16 model
model = AutoModelForCausalLM.from_pretrained("bertin-project/bertin-gpt-j-6B", revision="half", load_in_8bit=True, device_map='auto')

config = AutoConfig.from_pretrained("bertin-project/bertin-gpt-j-6B")

# create pipeline
pipe = pipeline("text-generation", model=model, config=config, tokenizer=tokenizer, device=0,)

def predict(text):
  return pipe(f"text: {text}, entities:")["generated_text"]

iface = gr.Interface(
  fn=predict, 
  inputs='text',
  outputs='text',
  examples=[["Yo hoy voy a hablar de mujeres en el mundo del arte, porque me ha leído un libro fantástico que se llama Historia del arte sin hombres, de Katie Hesel."]]
)

iface.launch()