import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from datetime import datetime print('{}:loading...'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) tokenizer = AutoTokenizer.from_pretrained('line-corporation/japanese-large-lm-1.7b-instruction-sft', use_fast=False) model = AutoModelForCausalLM.from_pretrained('line-corporation/japanese-large-lm-1.7b-instruction-sft') #tokenizer = AutoTokenizer.from_pretrained('line-corporation/japanese-large-lm-3.6b-instruction-sft', use_fast=False) #model = AutoModelForCausalLM.from_pretrained('line-corporation/japanese-large-lm-3.6b-instruction-sft') if torch.cuda.is_available(): model.half() model = model.to('cuda') generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=model.device) print('{}:done.'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) def generate(input_text, maxlen): input = f'ユーザー: {input_text}\nシステム: ' output = generator( input, max_length=maxlen, do_sample=True, temperature=0.7, top_p=0.9, top_k=0, repetition_penalty=1.1, num_beams=1, num_return_sequences=1, pad_token_id=tokenizer.pad_token_id, bos_token_id=tokenizer.bos_token_id, eos_token_id=tokenizer.eos_token_id ) generated_text = output[0]['generated_text'][len(input) + 1:] return generated_text with gr.Blocks(title='question answering ja') as app: gr.Markdown('# Question Answering JA') chatbot = gr.Chatbot(label='answer') msg = gr.Textbox(label='question') maxlen = gr.Slider(minimum=30, maximum=100, value=30, step=1, label='max length') clear = gr.ClearButton([msg, chatbot]) def respond(message, maxlen, chat_history): if message == '': return '', chat_history bot_message = generate(message, maxlen) chat_history.append((message, bot_message)) return '', chat_history msg.submit(respond, [msg, maxlen, chatbot], [msg, chatbot], concurrency_limit=20) app.launch()