Spaces:
Runtime error
Runtime error
import streamlit as st | |
import torch | |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer | |
def get_model(): | |
# Load fine-tuned MRC model by HuggingFace Model Hub | |
HUGGINGFACE_MODEL_PATH = "bespin-global/klue-bert-base-aihub-mrc" | |
tokenizer = AutoTokenizer.from_pretrained(HUGGINGFACE_MODEL_PATH) | |
model = AutoModelForQuestionAnswering.from_pretrained(HUGGINGFACE_MODEL_PATH) | |
return tokenizer, model | |
tokenizer, model = get_model() | |
def predict_answer(qa_text_pair): | |
# Encoding | |
encodings = tokenizer( | |
qa_text_pair['question'], qa_text_pair['context'], | |
max_length=512, | |
truncation=True, | |
padding="max_length", | |
return_token_type_ids=False, | |
return_offsets_mapping=True | |
) | |
encodings = {key: torch.tensor([val]).to(device) for key, val in encodings.items()} | |
# Predict | |
with torch.no_grad(): | |
pred = model(encodings['input_ids'], encodings['attention_mask']) | |
start_logits, end_logits = pred.start_logits, pred.end_logits | |
token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1) | |
pred_ids = encodings['input_ids'][0][token_start_index: token_end_index + 1] | |
# Answer start/end offset of context. | |
answer_start_offset = int(encodings['offset_mapping'][0][token_start_index][0][0]) | |
answer_end_offset = int(encodings['offset_mapping'][0][token_end_index][0][1]) | |
answer_offset = (answer_start_offset, answer_end_offset) | |
# Decoding | |
answer_text = tokenizer.decode(pred_ids) # text | |
del encodings | |
return {'answer_text':answer_text, 'answer_offset':answer_offset} | |
## Title | |
st.title('βοΈ Bespin β QuestionAnswering') | |
## Text | |
st.write('[β‘bespin-global/klue-bert-base-aihub-mrc](https://huggingface.co./bespin-global/klue-bert-base-aihub-mrc) λͺ¨λΈ μ±λ₯ ν μ€νΈ νμ΄μ§ μ λλ€.') | |
context_option = st.selectbox(' π Select Context Examples.', | |
( | |
'μ€ν°λΈ ν΄ μ€ν°λΈ μ‘μ€(μμ΄: Steven Paul "Steve" Jobs, 1955λ 2μ 24μΌ ~ 2011λ 10μ 5μΌ)λ λ―Έκ΅μ κΈ°μ μΈμ΄μμΌλ©° μ νμ μ CEOμ΄μ 곡λ 창립μμ΄λ€. 2011λ 10μ 5μΌ μ·μ₯μμΌλ‘ μ¬λ§νλ€. 1976λ μ€ν°λΈ μμ¦λμ , λ‘λλ μ¨μΈκ³Ό ν¨κ» μ νμ 곡λ μ°½μ νκ³ , μ ν 2λ₯Ό ν΅ν΄ κ°μΈμ© μ»΄ν¨ν°λ₯Ό λμ€ννλ€. λν, GUIμ λ§μ°μ€μ κ°λ₯μ±μ μ²μμΌλ‘ λ΄λ€λ³΄κ³ μ ν 리μ¬μ 맀ν¨ν μμμ μ΄ κΈ°μ μ λμ νμλ€. 1986λ κ²½μλΆμμ μν΄ μ νμμ λμ¨ μ΄ν NeXT μ»΄ν¨ν°λ₯Ό μ°½μ νμ¬ μλ‘μ΄ κ°λ μ μ΄μ 체μ λ₯Ό κ°λ°νλ€. 1996λ μ νμ΄ NeXTλ₯Ό μΈμνκ² λλ©΄μ λ€μ μ νλ‘ λμμ€κ² λμκ³ 1997λ μλ μμ CEOλ‘ μ νμ λ€μ μ΄λκ² λμμΌλ©° μ΄ν λ€μκΈ μ νμ νμ ν΄ μμ₯μμ μ±κ³΅μ κ±°λκ² μ΄λμλ€. 2001λ μμ΄νμ μΆμνμ¬ μμ μ°μ μ 체λ₯Ό λ€λ°κΎΈμ΄ λμλ€. λν, 2007λ μμ΄ν°μ μΆμνλ©΄μ μ€λ§νΈν° μμ₯μ λ°κΎΈμ΄ λμκ³ 2010λ μμ΄ν¨λλ₯Ό μΆμν¨μΌλ‘μ¨ ν¬μ€νΈPC μλ(Post-PC era)λ₯Ό μ΄μλ€. μ€ν°λΈ μ‘μ€λ μ λλ©μ΄μ μν γμΈν¬λ λλΈγκ³Ό γν μ΄ μ€ν 리γ λ±μ μ μν μ»΄ν¨ν° μ λλ©μ΄μ μ μμ¬μΈ ν½μ¬μ μμ μ£Όμ΄μ CEOμλ€. μνΈ λμ¦λ νμ¬λ 74μ΅ λ¬λ¬μ΄μΉμ μμ¬ μ£ΌμμΌλ‘ μ΄ νμ¬λ₯Ό ꡬμ νμλ€. 2006λ 6μ μ΄ κ±°λκ° μλ£λμ΄ μ‘μ€λ μ΄ κ±°λλ₯Ό ν΅ν΄ λμ¦λ μ§λΆμ 7%λ₯Ό μμ ν, μ΅λμ κ°μΈ μ£Όμ£Όμ΄μ λμ¦λ μ΄μ¬νμ μ΄μ¬κ° λμλ€. ννΈ κ·Έλ 2003λ λ¬΄λ ΅λΆν° μ·μ₯μμΌλ‘ ν¬λ³μνμ μ΄μ΄μλ€. κ·Έμ μ νλ 건κ°μνλ‘ μΈνμ¬ 2011λ 8μ 24μΌ μ νμ μ€ν°λΈ μ‘μ€κ° μ΅κ³ κ²½μμ± μμ(CEO)λ₯Ό μ¬μνκ³ μ΅κ³ μ΄μμ± μμ(COO)μΈ ν μΏ‘μ΄ μλ‘μ΄ CEOλ₯Ό 맑λλ€κ³ λ°νλ€. μ‘μ€λ CEOμ§μμ λ¬Όλ¬λμ§λ§ μ΄μ¬ν μμ₯μ§μ μ μ§μν€κΈ°λ‘ νμΌλ, 건κ°μνκ° λμ± μ νλμ΄ μ¬μ 2κ°μλ μ§λμ§ μμ 2011λ 10μ 5μΌ ν₯λ 56μΈμ λμ΄λ‘ μ¬λ§νλ€.', | |
'λΉνΈμ½μΈμ 2009λ μ¬ν μ λμΉ΄λͺ¨ν [6]κ° λ§λ κ°μννλ‘, ν΅νλ₯Ό λ°ννκ³ κ΄λ¦¬νλ μ€μ μ₯μΉκ° μ‘΄μ¬νμ§ μλ ꡬ쑰λ₯Ό κ°μ§κ³ μλ€. λμ , λΉνΈμ½μΈμ κ±°λλ P2P κΈ°λ° λΆμ° λ°μ΄ν°λ² μ΄μ€μ μν΄ μ΄λ£¨μ΄μ§λ©°, κ³΅κ° ν€ μνΈ λ°©μ κΈ°λ°μΌλ‘ κ±°λλ₯Ό μννλ€. λΉνΈμ½μΈμ 곡κ°μ±μ κ°μ§κ³ μλ€. λΉνΈμ½μΈμ μ§κ° νμΌμ ννλ‘ μ μ₯λλ©°, μ΄ μ§κ°μλ κ°κ°μ κ³ μ μ£Όμκ° λΆμ¬λλ©°, κ·Έ μ£Όμλ₯Ό κΈ°λ°μΌλ‘ λΉνΈμ½μΈμ κ±°λκ° μ΄λ£¨μ΄μ§λ€. λΉνΈμ½μΈμ 1998λ μ¨μ΄λ°μ΄κ° μ¬μ΄λ²νν¬ λ©μΌλ§ 리μ€νΈμ μ¬λ¦° μνΈν΅ν(cryptocurrency)λ ꡬμμ μ΅μ΄λ‘ ꡬνν κ² μ€μ νλμ΄λ€.[7][8] λΉνΈμ½μΈμ κ³΅κ° ν€ μνΈ λ°©μμ μ΄μ©ν΄ 곡κ°λ κ³μ κ°μ κ±°λλ₯Ό νλ€. λͺ¨λ κ±°λλ λΉκ³΅κ°μ μ΄λ κ±°λμ κΈ°λ‘μ λ¨μΌλ©°, λΆμ° λ°μ΄ν°λ² μ΄μ€μ μ μ₯λλ€. λΆμ°λ μκ°μλ²λ‘ μΌλ ¨μ μμ μ¦λͺ (proof-of-work)μ νμ¬ μ€λ³΅μ§μΆ(double-spending)μ λ°©μ§νλ€. κ±°λ κΈ°λ‘μ λͺ¨λ λ°μ΄ν°λ² μ΄μ€μ μ μ₯λμ΄μΌ νλ€. μ μ₯μ ν¬κΈ°λ₯Ό μ€μ΄κΈ° μν΄ λ¨Έν΄ νΈλ¦¬(Merkle tree)κ° μ¬μ©λλ€.' | |
) | |
) | |
# Text Input | |
context = st.text_area("Context.", value=context_option, height=300, on_change=None) # placeholder="Please input some context..", | |
if 'μ€ν°λΈ ν΄ μ€ν°λΈ μ‘μ€' in context_option: | |
question_option = st.selectbox('π‘ Select Question Examples.', | |
( | |
'μ€ν°λΈ μ‘μ€κ° λꡬμΌ?', 'μ€ν°λΈ μ‘μ€λ μ νλ‘ λμμμ μ΄λ»κ² νμ΄?', 'μ μ νμ λμμ΄?', 'μ€ν°λΈ μ‘μ€λ μ΄λ»κ² λ€μ μ νλ‘ λμμ€κ² λμμ΄?', 'ν½μ¬λ λ μ μνμ΄?', 'μ ν μΏ‘μ μλ‘μ΄ CEOλ‘ λ§‘μμ΄?', 'μ€ν°λΈ μ‘μ€λ μΈμ μ¬λ§νμ΄?' | |
) | |
) | |
elif 'λΉνΈμ½μΈ' in context_option: | |
question_option = st.selectbox('π‘ Select Question Examples.', | |
( | |
'λΉνΈμ½μΈμ μ΄λ€ ꡬ쑰μΌ?', 'λΉνΈμ½μΈμ μ΄λ»κ² κ±°λκ° λΌ?', 'λΉνΈμ½μΈ μ§κ°μλ λκ° λΆμ¬ λΌ?', '곡κ°λ κ³μ κ° κ±°λ μ λ μ΄μ©ν΄?', 'λͺ¨λ κ±°λλ μ΄λ»κ² λ¨μ?', 'λ¨Έν΄ νΈλ¦¬κ° μ μ¬μ© λΌ?' | |
) | |
) | |
# Text Area | |
question = st.text_area("Question.", value=question_option, on_change=None) # placeholder="Please input your question.." | |
if st.button("Submit", key='question'): | |
try: | |
# Progress spinner | |
with st.spinner('Wait for it...'): | |
# Encoding | |
encodings = tokenizer(context, question, | |
max_length=512, | |
truncation=True, | |
padding="max_length", | |
return_token_type_ids=False | |
) | |
encodings = {key: torch.tensor([val]) for key, val in encodings.items()} | |
input_ids = encodings["input_ids"] | |
attention_mask = encodings["attention_mask"] | |
# Predict | |
pred = model(input_ids, attention_mask=attention_mask) | |
start_logits, end_logits = pred.start_logits, pred.end_logits | |
token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1) | |
pred_ids = input_ids[0][token_start_index: token_end_index + 1] | |
# Decoding | |
prediction = tokenizer.decode(pred_ids) | |
# answer | |
st.success(prediction) | |
except Exception as e: | |
st.error(e) | |