File size: 7,738 Bytes
bfe4f62
 
 
 
 
 
 
 
 
 
d01bebb
bfe4f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d01bebb
 
bfe4f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d01bebb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfe4f62
 
d01bebb
bfe4f62
 
d01bebb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import streamlit as st
import torch
from transformers import AutoModelForQuestionAnswering, AutoTokenizer


@st.cache(allow_output_mutation=True)
def get_model():
    # Load fine-tuned MRC model by HuggingFace Model Hub
    HUGGINGFACE_MODEL_PATH = "bespin-global/klue-bert-base-aihub-mrc"
    tokenizer = AutoTokenizer.from_pretrained(HUGGINGFACE_MODEL_PATH)
    model = AutoModelForQuestionAnswering.from_pretrained(HUGGINGFACE_MODEL_PATH)

    return tokenizer, model

tokenizer, model = get_model()


def predict_answer(qa_text_pair):
    # Encoding
    encodings = tokenizer(
        qa_text_pair['question'], qa_text_pair['context'], 
        max_length=512, 
        truncation=True,
        padding="max_length", 
        return_token_type_ids=False,
        return_offsets_mapping=True
    )
    encodings = {key: torch.tensor([val]).to(device) for key, val in encodings.items()}

    # Predict
    with torch.no_grad():
        pred = model(encodings['input_ids'], encodings['attention_mask'])
        start_logits, end_logits = pred.start_logits, pred.end_logits
        token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1)
        pred_ids = encodings['input_ids'][0][token_start_index: token_end_index + 1]

        # Answer start/end offset of context.
        answer_start_offset = int(encodings['offset_mapping'][0][token_start_index][0][0])
        answer_end_offset = int(encodings['offset_mapping'][0][token_end_index][0][1])
        answer_offset = (answer_start_offset, answer_end_offset)

    # Decoding
    answer_text = tokenizer.decode(pred_ids) # text
    del encodings 
    return {'answer_text':answer_text, 'answer_offset':answer_offset}


## Title
st.title('☁️ Bespin β†’ QuestionAnswering')

## Text
st.write('[⚑bespin-global/klue-bert-base-aihub-mrc](https://huggingface.co./bespin-global/klue-bert-base-aihub-mrc) λͺ¨λΈ μ„±λŠ₯ ν…ŒμŠ€νŠΈ νŽ˜μ΄μ§€ μž…λ‹ˆλ‹€.')


context_option = st.selectbox(' πŸ“‘ Select Context Examples.',
    (
        'μŠ€ν‹°λΈ 폴 μŠ€ν‹°λΈŒ 작슀(μ˜μ–΄: Steven Paul "Steve" Jobs, 1955λ…„ 2μ›” 24일 ~ 2011λ…„ 10μ›” 5일)λŠ” 미ꡭ의 κΈ°μ—…μΈμ΄μ—ˆμœΌλ©° μ• ν”Œμ˜ μ „ CEO이자 곡동 μ°½λ¦½μžμ΄λ‹€. 2011λ…„ 10μ›” 5일 췌μž₯μ•”μœΌλ‘œ μ‚¬λ§ν–ˆλ‹€. 1976λ…„ μŠ€ν‹°λΈŒ μ›Œμ¦ˆλ‹ˆμ•…, λ‘œλ„λ“œ 웨인과 ν•¨κ»˜ μ• ν”Œμ„ 곡동 μ°½μ—…ν•˜κ³ , μ• ν”Œ 2λ₯Ό 톡해 개인용 컴퓨터λ₯Ό λŒ€μ€‘ν™”ν–ˆλ‹€. λ˜ν•œ, GUI와 마우슀의 κ°€λŠ₯성을 처음으둜 내닀보고 μ• ν”Œ 리사와 λ§€ν‚¨ν† μ‹œμ—μ„œ 이 κΈ°μˆ μ„ λ„μž…ν•˜μ˜€λ‹€. 1986λ…„ κ²½μ˜λΆ„μŸμ— μ˜ν•΄ μ• ν”Œμ—μ„œ λ‚˜μ˜¨ 이후 NeXT 컴퓨터λ₯Ό μ°½μ—…ν•˜μ—¬ μƒˆλ‘œμš΄ κ°œλ…μ˜ 운영 체제λ₯Ό κ°œλ°œν–ˆλ‹€. 1996λ…„ μ• ν”Œμ΄ NeXTλ₯Ό μΈμˆ˜ν•˜κ²Œ λ˜λ©΄μ„œ λ‹€μ‹œ μ• ν”Œλ‘œ λŒμ•„μ˜€κ²Œ λ˜μ—ˆκ³  1997λ…„μ—λŠ” μž„μ‹œ CEO둜 μ• ν”Œμ„ λ‹€μ‹œ 이끌게 λ˜μ—ˆμœΌλ©° 이후 λ‹€μ‹œκΈˆ μ• ν”Œμ„ ν˜μ‹ ν•΄ μ‹œμž₯μ—μ„œ 성곡을 κ±°λ‘κ²Œ μ΄λŒμ—ˆλ‹€. 2001λ…„ μ•„μ΄νŒŸμ„ μΆœμ‹œν•˜μ—¬ μŒμ•… μ‚°μ—… 전체λ₯Ό λ’€λ°”κΎΈμ–΄ λ†“μ•˜λ‹€. λ˜ν•œ, 2007λ…„ 아이폰을 μΆœμ‹œν•˜λ©΄μ„œ 슀마트폰 μ‹œμž₯을 λ°”κΎΈμ–΄ λ†“μ•˜κ³  2010λ…„ μ•„μ΄νŒ¨λ“œλ₯Ό μΆœμ‹œν•¨μœΌλ‘œμ¨ 포슀트PC μ‹œλŒ€(Post-PC era)λ₯Ό μ—΄μ—ˆλ‹€. μŠ€ν‹°λΈŒ μž‘μŠ€λŠ” μ• λ‹ˆλ©”μ΄μ…˜ μ˜ν™” γ€ŠμΈν¬λ ˆλ”λΈ”γ€‹κ³Ό γ€Šν† μ΄ μŠ€ν† λ¦¬γ€‹ 등을 μ œμž‘ν•œ 컴퓨터 μ• λ‹ˆλ©”μ΄μ…˜ μ œμž‘μ‚¬μΈ ν”½μ‚¬μ˜ μ†Œμœ μ£Όμ΄μž CEOμ˜€λ‹€. μ›”νŠΈ λ””μ¦ˆλ‹ˆ νšŒμ‚¬λŠ” 74μ–΅ λ‹¬λŸ¬μ–΄μΉ˜μ˜ μžμ‚¬ μ£Όμ‹μœΌλ‘œ 이 νšŒμ‚¬λ₯Ό κ΅¬μž…ν•˜μ˜€λ‹€. 2006λ…„ 6μ›” 이 κ±°λž˜κ°€ μ™„λ£Œλ˜μ–΄ μž‘μŠ€λŠ” 이 거래λ₯Ό 톡해 λ””μ¦ˆλ‹ˆ μ§€λΆ„μ˜ 7%λ₯Ό μ†Œμœ ν•œ, μ΅œλŒ€μ˜ 개인 주주이자 λ””μ¦ˆλ‹ˆ μ΄μ‚¬νšŒμ˜ 이사가 λ˜μ—ˆλ‹€. ν•œνŽΈ κ·ΈλŠ” 2003λ…„ 무렡뢀터 췌μž₯μ•”μœΌλ‘œ νˆ¬λ³‘μƒν™œμ„ 이어왔닀. 그의 μ•…ν™”λœ κ±΄κ°•μƒνƒœλ‘œ μΈν•˜μ—¬ 2011λ…„ 8μ›” 24일 μ• ν”Œμ€ μŠ€ν‹°λΈŒ μž‘μŠ€κ°€ μ΅œκ³ κ²½μ˜μ±…μž„μž(CEO)λ₯Ό μ‚¬μž„ν•˜κ³  μ΅œκ³ μš΄μ˜μ±…μž„μž(COO)인 νŒ€ 쿑이 μƒˆλ‘œμš΄ CEOλ₯Ό λ§‘λŠ”λ‹€κ³  λ°ν˜”λ‹€. μž‘μŠ€λŠ” CEOμ§μ—μ„œ λ¬ΌλŸ¬λ‚˜μ§€λ§Œ μ΄μ‚¬νšŒ 의μž₯직은 μœ μ§€μ‹œν‚€κΈ°λ‘œ ν–ˆμœΌλ‚˜, κ±΄κ°•μƒνƒœκ°€ λ”μš± μ•…ν™”λ˜μ–΄ μ‚¬μž„ 2κ°œμ›”λ„ μ§€λ‚˜μ§€ μ•Šμ€ 2011λ…„ 10μ›” 5일 ν–₯λ…„ 56μ„Έμ˜ λ‚˜μ΄λ‘œ μ‚¬λ§ν–ˆλ‹€.',
        'λΉ„νŠΈμ½”μΈμ€ 2009λ…„ μ‚¬ν† μ‹œ λ‚˜μΉ΄λͺ¨ν† [6]κ°€ λ§Œλ“  κ°€μƒν™”νλ‘œ, 톡화λ₯Ό λ°œν–‰ν•˜κ³  κ΄€λ¦¬ν•˜λŠ” 쀑앙 μž₯μΉ˜κ°€ μ‘΄μž¬ν•˜μ§€ μ•ŠλŠ” ꡬ쑰λ₯Ό 가지고 μžˆλ‹€. λŒ€μ‹ , λΉ„νŠΈμ½”μΈμ˜ κ±°λž˜λŠ” P2P 기반 λΆ„μ‚° λ°μ΄ν„°λ² μ΄μŠ€μ— μ˜ν•΄ 이루어지며, 곡개 ν‚€ μ•”ν˜Έ 방식 기반으둜 거래λ₯Ό μˆ˜ν–‰ν•œλ‹€. λΉ„νŠΈμ½”μΈμ€ κ³΅κ°œμ„±μ„ 가지고 μžˆλ‹€. λΉ„νŠΈμ½”μΈμ€ 지갑 파일의 ν˜•νƒœλ‘œ μ €μž₯되며, 이 μ§€κ°‘μ—λŠ” 각각의 고유 μ£Όμ†Œκ°€ λΆ€μ—¬λ˜λ©°, κ·Έ μ£Όμ†Œλ₯Ό 기반으둜 λΉ„νŠΈμ½”μΈμ˜ κ±°λž˜κ°€ 이루어진닀. λΉ„νŠΈμ½”μΈμ€ 1998λ…„ 웨이따이가 μ‚¬μ΄λ²„νŽ‘ν¬ 메일링 λ¦¬μŠ€νŠΈμ— 올린 μ•”ν˜Έν†΅ν™”(cryptocurrency)λž€ ꡬ상을 졜초둜 κ΅¬ν˜„ν•œ 것 μ€‘μ˜ ν•˜λ‚˜μ΄λ‹€.[7][8] λΉ„νŠΈμ½”μΈμ€ 곡개 ν‚€ μ•”ν˜Έ 방식을 μ΄μš©ν•΄ 곡개된 계정간에 거래λ₯Ό ν•œλ‹€. λͺ¨λ“  κ±°λž˜λŠ” λΉ„κ³΅κ°œμ μ΄λ‚˜ 거래의 기둝은 λ‚¨μœΌλ©°, λΆ„μ‚° λ°μ΄ν„°λ² μ΄μŠ€μ— μ €μž₯λœλ‹€. λΆ„μ‚°λœ μ‹œκ°„μ„œλ²„λ‘œ 일련의 μž‘μ—…μ¦λͺ…(proof-of-work)을 ν•˜μ—¬ μ€‘λ³΅μ§€μΆœ(double-spending)을 λ°©μ§€ν•œλ‹€. 거래 기둝은 λͺ¨λ‘ λ°μ΄ν„°λ² μ΄μŠ€μ— μ €μž₯λ˜μ–΄μ•Ό ν•œλ‹€. μ €μž₯μ†Œ 크기λ₯Ό 쀄이기 μœ„ν•΄ 머클 트리(Merkle tree)κ°€ μ‚¬μš©λœλ‹€.'
    )   
)
# Text Input
context = st.text_area("Context.", value=context_option, height=300, on_change=None) # placeholder="Please input some context..",


if 'μŠ€ν‹°λΈ 폴 μŠ€ν‹°λΈŒ 작슀' in context_option:
    question_option = st.selectbox('πŸ’‘ Select Question Examples.',
        (
            'μŠ€ν‹°λΈŒ μž‘μŠ€κ°€ λˆ„κ΅¬μ•Ό?', 'μŠ€ν‹°λΈŒ μž‘μŠ€λŠ” μ• ν”Œλ‘œ λŒμ•„μ™€μ„œ μ–΄λ–»κ²Œ ν–ˆμ–΄?', 'μ™œ μ• ν”Œμ„ λ‚˜μ™”μ–΄?', 'μŠ€ν‹°λΈŒ μž‘μŠ€λŠ” μ–΄λ–»κ²Œ λ‹€μ‹œ μ• ν”Œλ‘œ λŒμ•„μ˜€κ²Œ λ˜μ—ˆμ–΄?', 'ν”½μ‚¬λŠ” 뭘 μ œμž‘ν–ˆμ–΄?', 'μ™œ  νŒ€ 쿑을 μƒˆλ‘œμš΄ CEO둜 λ§‘μ•˜μ–΄?', 'μŠ€ν‹°λΈŒ μž‘μŠ€λŠ” μ–Έμ œ μ‚¬λ§ν–ˆμ–΄?'
        )
    )
elif 'λΉ„νŠΈμ½”μΈ' in context_option:
    question_option = st.selectbox('πŸ’‘ Select Question Examples.',
        (
            'λΉ„νŠΈμ½”μΈμ€ μ–΄λ–€ ꡬ쑰야?', 'λΉ„νŠΈμ½”μΈμ€ μ–΄λ–»κ²Œ κ±°λž˜κ°€ 돼?', 'λΉ„νŠΈμ½”μΈ μ§€κ°‘μ—λŠ” 뭐가 λΆ€μ—¬ 돼?', '곡개된 계정간 거래 μ‹œ 뭘 μ΄μš©ν•΄?', 'λͺ¨λ“  κ±°λž˜λŠ” μ–΄λ–»κ²Œ 남아?', '머클 νŠΈλ¦¬κ°€ μ™œ μ‚¬μš© 돼?'
        )
    )

# Text Area
question = st.text_area("Question.", value=question_option, on_change=None) # placeholder="Please input your question.."



if st.button("Submit", key='question'):
    try:
        # Progress spinner
        with st.spinner('Wait for it...'):
            # Encoding
            encodings = tokenizer(context, question, 
                              max_length=512, 
                              truncation=True,
                              padding="max_length", 
                              return_token_type_ids=False
                              )
            encodings = {key: torch.tensor([val]) for key, val in encodings.items()}             
            input_ids = encodings["input_ids"]
            attention_mask = encodings["attention_mask"]

            # Predict
            pred = model(input_ids, attention_mask=attention_mask)

            start_logits, end_logits = pred.start_logits, pred.end_logits
            token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1)
            pred_ids = input_ids[0][token_start_index: token_end_index + 1]

            # Decoding
            prediction = tokenizer.decode(pred_ids)

            # answer
            st.success(prediction)

    except Exception as e:
        st.error(e)