Spaces:

bespin-global
/

Bespin-QuestionAnswering

Runtime error

App Files Files Community

Bespin-QuestionAnswering / app.py

Copycats

Update app.py

d01bebb over 2 years ago

raw

history blame

7.74 kB

	import streamlit as st
	import torch
	from transformers import AutoModelForQuestionAnswering, AutoTokenizer


	@st.cache(allow_output_mutation=True)
	def get_model():
	# Load fine-tuned MRC model by HuggingFace Model Hub
	HUGGINGFACE_MODEL_PATH = "bespin-global/klue-bert-base-aihub-mrc"
	tokenizer = AutoTokenizer.from_pretrained(HUGGINGFACE_MODEL_PATH)
	model = AutoModelForQuestionAnswering.from_pretrained(HUGGINGFACE_MODEL_PATH)

	return tokenizer, model

	tokenizer, model = get_model()


	def predict_answer(qa_text_pair):
	# Encoding
	encodings = tokenizer(
	qa_text_pair['question'], qa_text_pair['context'],
	max_length=512,
	truncation=True,
	padding="max_length",
	return_token_type_ids=False,
	return_offsets_mapping=True
	)
	encodings = {key: torch.tensor([val]).to(device) for key, val in encodings.items()}

	# Predict
	with torch.no_grad():
	pred = model(encodings['input_ids'], encodings['attention_mask'])
	start_logits, end_logits = pred.start_logits, pred.end_logits
	token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1)
	pred_ids = encodings['input_ids'][0][token_start_index: token_end_index + 1]

	# Answer start/end offset of context.
	answer_start_offset = int(encodings['offset_mapping'][0][token_start_index][0][0])
	answer_end_offset = int(encodings['offset_mapping'][0][token_end_index][0][1])
	answer_offset = (answer_start_offset, answer_end_offset)

	# Decoding
	answer_text = tokenizer.decode(pred_ids) # text
	del encodings
	return {'answer_text':answer_text, 'answer_offset':answer_offset}


	## Title
	st.title('☁️ Bespin → QuestionAnswering')

	## Text
	st.write('[⚡bespin-global/klue-bert-base-aihub-mrc](https://huggingface.co./bespin-global/klue-bert-base-aihub-mrc) 모델 성능 테스트 페이지 입니다.')


	context_option = st.selectbox(' 📑 Select Context Examples.',
	(
	'스티븐 폴 스티브 잡스(영어: Steven Paul "Steve" Jobs, 1955년 2월 24일 ~ 2011년 10월 5일)는 미국의 기업인이었으며 애플의 전 CEO이자 공동 창립자이다. 2011년 10월 5일 췌장암으로 사망했다. 1976년 스티브 워즈니악, 로널드 웨인과 함께 애플을 공동 창업하고, 애플 2를 통해 개인용 컴퓨터를 대중화했다. 또한, GUI와 마우스의 가능성을 처음으로 내다보고 애플 리사와 매킨토시에서 이 기술을 도입하였다. 1986년 경영분쟁에 의해 애플에서 나온 이후 NeXT 컴퓨터를 창업하여 새로운 개념의 운영 체제를 개발했다. 1996년 애플이 NeXT를 인수하게 되면서 다시 애플로 돌아오게 되었고 1997년에는 임시 CEO로 애플을 다시 이끌게 되었으며 이후 다시금 애플을 혁신해 시장에서 성공을 거두게 이끌었다. 2001년 아이팟을 출시하여 음악 산업 전체를 뒤바꾸어 놓았다. 또한, 2007년 아이폰을 출시하면서 스마트폰 시장을 바꾸어 놓았고 2010년 아이패드를 출시함으로써 포스트PC 시대(Post-PC era)를 열었다. 스티브 잡스는 애니메이션 영화 《인크레더블》과 《토이 스토리》 등을 제작한 컴퓨터 애니메이션 제작사인 픽사의 소유주이자 CEO였다. 월트 디즈니 회사는 74억 달러어치의 자사 주식으로 이 회사를 구입하였다. 2006년 6월 이 거래가 완료되어 잡스는 이 거래를 통해 디즈니 지분의 7%를 소유한, 최대의 개인 주주이자 디즈니 이사회의 이사가 되었다. 한편 그는 2003년 무렵부터 췌장암으로 투병생활을 이어왔다. 그의 악화된 건강상태로 인하여 2011년 8월 24일 애플은 스티브 잡스가 최고경영책임자(CEO)를 사임하고 최고운영책임자(COO)인 팀 쿡이 새로운 CEO를 맡는다고 밝혔다. 잡스는 CEO직에서 물러나지만 이사회 의장직은 유지시키기로 했으나, 건강상태가 더욱 악화되어 사임 2개월도 지나지 않은 2011년 10월 5일 향년 56세의 나이로 사망했다.',
	'비트코인은 2009년 사토시 나카모토[6]가 만든 가상화폐로, 통화를 발행하고 관리하는 중앙 장치가 존재하지 않는 구조를 가지고 있다. 대신, 비트코인의 거래는 P2P 기반 분산 데이터베이스에 의해 이루어지며, 공개 키 암호 방식 기반으로 거래를 수행한다. 비트코인은 공개성을 가지고 있다. 비트코인은 지갑 파일의 형태로 저장되며, 이 지갑에는 각각의 고유 주소가 부여되며, 그 주소를 기반으로 비트코인의 거래가 이루어진다. 비트코인은 1998년 웨이따이가 사이버펑크 메일링 리스트에 올린 암호통화(cryptocurrency)란 구상을 최초로 구현한 것 중의 하나이다.[7][8] 비트코인은 공개 키 암호 방식을 이용해 공개된 계정간에 거래를 한다. 모든 거래는 비공개적이나 거래의 기록은 남으며, 분산 데이터베이스에 저장된다. 분산된 시간서버로 일련의 작업증명(proof-of-work)을 하여 중복지출(double-spending)을 방지한다. 거래 기록은 모두 데이터베이스에 저장되어야 한다. 저장소 크기를 줄이기 위해 머클 트리(Merkle tree)가 사용된다.'
	)
	)
	# Text Input
	context = st.text_area("Context.", value=context_option, height=300, on_change=None) # placeholder="Please input some context..",


	if '스티븐 폴 스티브 잡스' in context_option:
	question_option = st.selectbox('💡 Select Question Examples.',
	(
	'스티브 잡스가 누구야?', '스티브 잡스는 애플로 돌아와서 어떻게 했어?', '왜 애플을 나왔어?', '스티브 잡스는 어떻게 다시 애플로 돌아오게 되었어?', '픽사는 뭘 제작했어?', '왜 팀 쿡을 새로운 CEO로 맡았어?', '스티브 잡스는 언제 사망했어?'
	)
	)
	elif '비트코인' in context_option:
	question_option = st.selectbox('💡 Select Question Examples.',
	(
	'비트코인은 어떤 구조야?', '비트코인은 어떻게 거래가 돼?', '비트코인 지갑에는 뭐가 부여 돼?', '공개된 계정간 거래 시 뭘 이용해?', '모든 거래는 어떻게 남아?', '머클 트리가 왜 사용 돼?'
	)
	)

	# Text Area
	question = st.text_area("Question.", value=question_option, on_change=None) # placeholder="Please input your question.."



	if st.button("Submit", key='question'):
	try:
	# Progress spinner
	with st.spinner('Wait for it...'):
	# Encoding
	encodings = tokenizer(context, question,
	max_length=512,
	truncation=True,
	padding="max_length",
	return_token_type_ids=False
	)
	encodings = {key: torch.tensor([val]) for key, val in encodings.items()}
	input_ids = encodings["input_ids"]
	attention_mask = encodings["attention_mask"]

	# Predict
	pred = model(input_ids, attention_mask=attention_mask)

	start_logits, end_logits = pred.start_logits, pred.end_logits
	token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1)
	pred_ids = input_ids[0][token_start_index: token_end_index + 1]

	# Decoding
	prediction = tokenizer.decode(pred_ids)

	# answer
	st.success(prediction)

	except Exception as e:
	st.error(e)