import gradio as gr import os from transformers import AutoTokenizer from get_loss.get_loss_hf import run_get_loss import pdb from types import SimpleNamespace # os.system('git clone https://github.com/EleutherAI/lm-evaluation-harness') # os.system('cd lm-evaluation-harness') # os.system('pip install -e .') # -i https://pypi.tuna.tsinghua.edu.cn/simple # 第一个功能:基于输入文本和对应的损失值对文本进行着色展示 def color_text(text_list=["hi", "FreshEval","!"], loss_list=[0.1,0.7]): """ 根据损失值为文本着色。 """ highlighted_text = [] # print('loss_list',loss_list) # ndarray to list loss_list = loss_list.tolist() loss_list=[0]+loss_list # print('loss_list',loss_list) # print('text_list',text_list) # pdb.set_trace() for text, loss in zip(text_list, loss_list): # color = "#FF0000" if float(loss) > 0.5 else "#00FF00" color=loss/20#TODO rescale # highlighted_text.append({"text": text, "bg_color": color}) highlighted_text.append((text, color)) print('highlighted_text',highlighted_text) return highlighted_text # 第二个功能:根据 ID 列表和 tokenizer 将 ID 转换为文本,并展示 def get_text(ids_list=[0.1,0.7], tokenizer=None): """ 给定一个 ID 列表和 tokenizer 名称,将这些 ID 转换成文本。 """ # return ['Hi', 'Adam'] # tokenizer = AutoTokenizer.from_pretrained(tokenizer) # print('ids_list',ids_list) # pdb.set_trace() text=[] for id in ids_list: text.append( tokenizer.decode(id, skip_special_tokens=True)) # 这里只是简单地返回文本,但是可以根据实际需求添加颜色或其他样式 print(f'L41:{text}') return text # def get_ids_loss(text, tokenizer, model): # """ # 给定一个文本,model and its tokenizer,返回其对应的 IDs 和损失值。 # """ # # tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) # # model = AutoModelForCausalLM.from_pretrained(model_name) # # 这里只是简单地返回 IDs 和损失值,但是可以根据实际需求添加颜色或其他样式 # return [1, 2], [0.1, 0.7] def color_pipeline(texts=["Hi","FreshEval","!"], model=None): """ 给定一个文本,返回其对应的着色文本。 """ print('text,model',texts,model) args=SimpleNamespace(texts=texts,model=model) print(f'L60,text:{texts}') rtn_dic=run_get_loss(args) # print(rtn_dic) # pdb.set_trace() # {'logit':logit,'input_ids':input_chunk,'tokenizer':tokenizer,'neg_log_prob_temp':neg_log_prob_temp} ids, loss =rtn_dic['input_ids'],rtn_dic['loss']#= get_ids_loss(text, tokenizer, model) # notice here is numpy ndarray tokenizer=rtn_dic['tokenizer'] # get tokenizer text = get_text(ids, tokenizer) # print('ids, loss ,text',ids, loss ,text) return color_text(text, loss) # TODO can this be global ? maybe need session to store info of the user # 创建 Gradio 界面 with gr.Blocks() as demo: with gr.Tab("color your text"): with gr.Row(): text_input = gr.Textbox(label="input text", placeholder="input your text here...") # TODO craw and drop the file # loss_input = gr.Number(label="loss") model_input = gr.Textbox(label="model name", placeholder="input your model name here... now I am trying phi-2...") output_box=gr.HighlightedText(label="colored text") # gr.Examples( # [ # # ["Hi FreshEval !", "microsoft/phi-2"], # ["Hello FreshBench !", "/home/sribd/chenghao/models/phi-2"], # ], # [text_input, model_input], # cache_examples=True, # # cache_examples=False, # fn=color_pipeline, # outputs=output_box # ) # TODO select models that can be used online # TODO maybe add our own models color_text_output = gr.HTML(label="colored text") color_text_button = gr.Button("color the text").click(color_pipeline, inputs=[text_input, model_input], outputs=output_box) date_time_input = gr.Textbox(label="the date when the text is generated")#TODO add date time input description_input = gr.Textbox(label="description of the text") submit_button = gr.Button("submit a post or record").click() #TODO add model and its score with gr.Tab('test your qeustion'): ''' use extract, or use ppl ''' question=gr.Textbox(placeholder='input your question here...') answer=gr.Textbox(placeholder='input your answer here...') other_choices=gr.Textbox(placeholder='input your other choices here...') test_button=gr.Button('test').click() #TODO add the model and its score def test_question(question, answer, other_choices): ''' use extract, or use ppl ''' answer_ppl, other_choices_ppl = get_ppl(question, answer, other_choices) return answer_ppl, other_choices_ppl with gr.Tab("model text ppl with time"): ''' see the matplotlib example, to see ppl with time, select the models ''' # load the json file with time, with gr.Tab("model quesion acc with time"): ''' see the matplotlib example, to see ppl with time, select the models ''' # with gr.Tab("hot questions"): ''' see the questions and answers ''' with gr.Tab("ppl"): ''' see the questions ''' demo.launch(debug=True) # import gradio as gr # import os # os.system('python -m spacy download en_core_web_sm') # import spacy # from spacy import displacy # nlp = spacy.load("en_core_web_sm") # def text_analysis(text): # doc = nlp(text) # html = displacy.render(doc, style="dep", page=True) # html = ( # "
" # + html # + "
" # ) # pos_count = { # "char_count": len(text), # "token_count": 0, # } # pos_tokens = [] # for token in doc: # pos_tokens.extend([(token.text, token.pos_), (" ", None)]) # return pos_tokens, pos_count, html # demo = gr.Interface( # text_analysis, # gr.Textbox(placeholder="Enter sentence here..."), # ["highlight", "json", "html"], # examples=[ # ["What a beautiful morning for a walk!"], # ["It was the best of times, it was the worst of times."], # ], # ) # demo.launch() # # lm-eval # # lm-evaluation-harness