File size: 7,085 Bytes
bfe4f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb82652
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
import torch
from transformers import AutoModelForQuestionAnswering, AutoTokenizer


@st.cache(allow_output_mutation=True)
def get_model():
    # Load fine-tuned MRC model by HuggingFace Model Hub
    HUGGINGFACE_MODEL_PATH = "bespin-global/klue-bert-base-aihub-mrc"
    tokenizer = AutoTokenizer.from_pretrained(HUGGINGFACE_MODEL_PATH)
    model = AutoModelForQuestionAnswering.from_pretrained(HUGGINGFACE_MODEL_PATH).to(device)

    return tokenizer, model

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
tokenizer, model = get_model()


def predict_answer(qa_text_pair):
    # Encoding
    encodings = tokenizer(
        qa_text_pair['question'], qa_text_pair['context'], 
        max_length=512, 
        truncation=True,
        padding="max_length", 
        return_token_type_ids=False,
        return_offsets_mapping=True
    )
    encodings = {key: torch.tensor([val]).to(device) for key, val in encodings.items()}

    # Predict
    with torch.no_grad():
        pred = model(encodings['input_ids'], encodings['attention_mask'])
        start_logits, end_logits = pred.start_logits, pred.end_logits
        token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1)
        pred_ids = encodings['input_ids'][0][token_start_index: token_end_index + 1]

        # Answer start/end offset of context.
        answer_start_offset = int(encodings['offset_mapping'][0][token_start_index][0][0])
        answer_end_offset = int(encodings['offset_mapping'][0][token_end_index][0][1])
        answer_offset = (answer_start_offset, answer_end_offset)

    # Decoding
    answer_text = tokenizer.decode(pred_ids) # text
    del encodings 
    return {'answer_text':answer_text, 'answer_offset':answer_offset}


## Title
st.title('โ˜๏ธ Bespin โ†’ QuestionAnswering')

## Text
st.write('[โšกbespin-global/klue-bert-base-aihub-mrc](https://huggingface.co./bespin-global/klue-bert-base-aihub-mrc) ๋ชจ๋ธ ์„ฑ๋Šฅ ํ…Œ์ŠคํŠธ ํŽ˜์ด์ง€ ์ž…๋‹ˆ๋‹ค.')


context_option = st.selectbox(' ๐Ÿ“‘ Select Context Examples.',
    (
        '์Šคํ‹ฐ๋ธ ํด ์Šคํ‹ฐ๋ธŒ ์žก์Šค(์˜์–ด:ย Steven Paul "Steve" Jobs,ย 1955๋…„ย 2์›” 24์ผย ~ย 2011๋…„ย 10์›” 5์ผ)๋Š”ย ๋ฏธ๊ตญ์˜ย ๊ธฐ์—…์ธ์ด์—ˆ์œผ๋ฉฐย ์• ํ”Œ์˜ ์ „ CEO์ด์ž ๊ณต๋™ ์ฐฝ๋ฆฝ์ž์ด๋‹ค.ย 2011๋…„ย 10์›” 5์ผย ์ทŒ์žฅ์•”์œผ๋กœ ์‚ฌ๋งํ–ˆ๋‹ค. 1976๋…„ย ์Šคํ‹ฐ๋ธŒ ์›Œ์ฆˆ๋‹ˆ์•…,ย ๋กœ๋„๋“œ ์›จ์ธ๊ณผ ํ•จ๊ป˜ย ์• ํ”Œ์„ ๊ณต๋™ ์ฐฝ์—…ํ•˜๊ณ ,ย ์• ํ”Œ 2๋ฅผ ํ†ตํ•ดย ๊ฐœ์ธ์šฉ ์ปดํ“จํ„ฐ๋ฅผ ๋Œ€์ค‘ํ™”ํ–ˆ๋‹ค. ๋˜ํ•œ,ย GUI์™€ย ๋งˆ์šฐ์Šค์˜ ๊ฐ€๋Šฅ์„ฑ์„ ์ฒ˜์Œ์œผ๋กœ ๋‚ด๋‹ค๋ณด๊ณ ย ์• ํ”Œ ๋ฆฌ์‚ฌ์™€ย ๋งคํ‚จํ† ์‹œ์—์„œ ์ด ๊ธฐ์ˆ ์„ ๋„์ž…ํ•˜์˜€๋‹ค.ย 1986๋…„ย ๊ฒฝ์˜๋ถ„์Ÿ์— ์˜ํ•ด ์• ํ”Œ์—์„œ ๋‚˜์˜จ ์ดํ›„ย NeXTย ์ปดํ“จํ„ฐ๋ฅผ ์ฐฝ์—…ํ•˜์—ฌ ์ƒˆ๋กœ์šด ๊ฐœ๋…์˜ ์šด์˜ ์ฒด์ œ๋ฅผ ๊ฐœ๋ฐœํ–ˆ๋‹ค.ย 1996๋…„ย ์• ํ”Œ์ด NeXT๋ฅผ ์ธ์ˆ˜ํ•˜๊ฒŒ ๋˜๋ฉด์„œ ๋‹ค์‹œ ์• ํ”Œ๋กœ ๋Œ์•„์˜ค๊ฒŒ ๋˜์—ˆ๊ณ  1997๋…„์—๋Š” ์ž„์‹œ CEO๋กœ ์• ํ”Œ์„ ๋‹ค์‹œ ์ด๋Œ๊ฒŒ ๋˜์—ˆ์œผ๋ฉฐ ์ดํ›„ ๋‹ค์‹œ๊ธˆ ์• ํ”Œ์„ ํ˜์‹ ํ•ด ์‹œ์žฅ์—์„œ ์„ฑ๊ณต์„ ๊ฑฐ๋‘๊ฒŒ ์ด๋Œ์—ˆ๋‹ค. 2001๋…„ย ์•„์ดํŒŸ์„ ์ถœ์‹œํ•˜์—ฌ ์Œ์•… ์‚ฐ์—… ์ „์ฒด๋ฅผ ๋’ค๋ฐ”๊พธ์–ด ๋†“์•˜๋‹ค. ๋˜ํ•œ, 2007๋…„ย ์•„์ดํฐ์„ ์ถœ์‹œํ•˜๋ฉด์„œ ์Šค๋งˆํŠธํฐ ์‹œ์žฅ์„ ๋ฐ”๊พธ์–ด ๋†“์•˜๊ณ  2010๋…„ย ์•„์ดํŒจ๋“œ๋ฅผ ์ถœ์‹œํ•จ์œผ๋กœ์จย ํฌ์ŠคํŠธPC ์‹œ๋Œ€(Post-PC era)๋ฅผ ์—ด์—ˆ๋‹ค. ์Šคํ‹ฐ๋ธŒ ์žก์Šค๋Š” ์• ๋‹ˆ๋ฉ”์ด์…˜ ์˜ํ™” ใ€Š์ธํฌ๋ ˆ๋”๋ธ”ใ€‹๊ณผ ใ€Šํ† ์ด ์Šคํ† ๋ฆฌใ€‹ ๋“ฑ์„ ์ œ์ž‘ํ•œย ์ปดํ“จํ„ฐ ์• ๋‹ˆ๋ฉ”์ด์…˜ย ์ œ์ž‘์‚ฌ์ธย ํ”ฝ์‚ฌ์˜ ์†Œ์œ ์ฃผ์ด์žย CEO์˜€๋‹ค.ย ์›”ํŠธ ๋””์ฆˆ๋‹ˆ ํšŒ์‚ฌ๋Š” 74์–ต ๋‹ฌ๋Ÿฌ์–ด์น˜์˜ ์ž์‚ฌ ์ฃผ์‹์œผ๋กœ ์ด ํšŒ์‚ฌ๋ฅผ ๊ตฌ์ž…ํ•˜์˜€๋‹ค.ย 2006๋…„ย 6์›” ์ด ๊ฑฐ๋ž˜๊ฐ€ ์™„๋ฃŒ๋˜์–ด ์žก์Šค๋Š” ์ด ๊ฑฐ๋ž˜๋ฅผ ํ†ตํ•ด ๋””์ฆˆ๋‹ˆ ์ง€๋ถ„์˜ 7%๋ฅผ ์†Œ์œ ํ•œ, ์ตœ๋Œ€์˜ ๊ฐœ์ธ ์ฃผ์ฃผ์ด์ž ๋””์ฆˆ๋‹ˆ ์ด์‚ฌํšŒ์˜ ์ด์‚ฌ๊ฐ€ ๋˜์—ˆ๋‹ค. ํ•œํŽธ ๊ทธ๋Š”ย 2003๋…„ย ๋ฌด๋ ต๋ถ€ํ„ฐย ์ทŒ์žฅ์•”์œผ๋กœ ํˆฌ๋ณ‘์ƒํ™œ์„ ์ด์–ด์™”๋‹ค. ๊ทธ์˜ ์•…ํ™”๋œ ๊ฑด๊ฐ•์ƒํƒœ๋กœ ์ธํ•˜์—ฌย 2011๋…„ย 8์›” 24์ผ ์• ํ”Œ์€ ์Šคํ‹ฐ๋ธŒ ์žก์Šค๊ฐ€ ์ตœ๊ณ ๊ฒฝ์˜์ฑ…์ž„์ž(CEO)๋ฅผ ์‚ฌ์ž„ํ•˜๊ณ  ์ตœ๊ณ ์šด์˜์ฑ…์ž„์ž(COO)์ธย ํŒ€ ์ฟก์ด ์ƒˆ๋กœ์šด CEO๋ฅผ ๋งก๋Š”๋‹ค๊ณ  ๋ฐํ˜”๋‹ค. ์žก์Šค๋Š” CEO์ง์—์„œ ๋ฌผ๋Ÿฌ๋‚˜์ง€๋งŒ ์ด์‚ฌํšŒ ์˜์žฅ์ง์€ ์œ ์ง€์‹œํ‚ค๊ธฐ๋กœ ํ–ˆ์œผ๋‚˜, ๊ฑด๊ฐ•์ƒํƒœ๊ฐ€ ๋”์šฑ ์•…ํ™”๋˜์–ด ์‚ฌ์ž„ 2๊ฐœ์›”๋„ ์ง€๋‚˜์ง€ ์•Š์€ย 2011๋…„ย 10์›” 5์ผย ํ–ฅ๋…„ 56์„ธ์˜ ๋‚˜์ด๋กœ ์‚ฌ๋งํ–ˆ๋‹ค.',
        '๋น„ํŠธ์ฝ”์ธ์€ย 2009๋…„ย ์‚ฌํ† ์‹œ ๋‚˜์นด๋ชจํ† [6]๊ฐ€ ๋งŒ๋“ ย ๊ฐ€์ƒํ™”ํ๋กœ, ํ†ตํ™”๋ฅผ ๋ฐœํ–‰ํ•˜๊ณ  ๊ด€๋ฆฌํ•˜๋Š” ์ค‘์•™ ์žฅ์น˜๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š๋Š” ๊ตฌ์กฐ๋ฅผ ๊ฐ€์ง€๊ณ  ์žˆ๋‹ค. ๋Œ€์‹ , ๋น„ํŠธ์ฝ”์ธ์˜ ๊ฑฐ๋ž˜๋Š”ย P2Pย ๊ธฐ๋ฐ˜ย ๋ถ„์‚ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์— ์˜ํ•ด ์ด๋ฃจ์–ด์ง€๋ฉฐ,ย ๊ณต๊ฐœ ํ‚ค ์•”ํ˜ธ ๋ฐฉ์‹ย ๊ธฐ๋ฐ˜์œผ๋กœ ๊ฑฐ๋ž˜๋ฅผ ์ˆ˜ํ–‰ํ•œ๋‹ค. ๋น„ํŠธ์ฝ”์ธ์€ ๊ณต๊ฐœ์„ฑ์„ ๊ฐ€์ง€๊ณ  ์žˆ๋‹ค. ๋น„ํŠธ์ฝ”์ธ์€ ์ง€๊ฐ‘ ํŒŒ์ผ์˜ ํ˜•ํƒœ๋กœ ์ €์žฅ๋˜๋ฉฐ, ์ด ์ง€๊ฐ‘์—๋Š” ๊ฐ๊ฐ์˜ ๊ณ ์œ  ์ฃผ์†Œ๊ฐ€ ๋ถ€์—ฌ๋˜๋ฉฐ, ๊ทธ ์ฃผ์†Œ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๋น„ํŠธ์ฝ”์ธ์˜ ๊ฑฐ๋ž˜๊ฐ€ ์ด๋ฃจ์–ด์ง„๋‹ค. ๋น„ํŠธ์ฝ”์ธ์€ 1998๋…„ ์›จ์ด๋”ฐ์ด๊ฐ€ ์‚ฌ์ด๋ฒ„ํŽ‘ํฌ ๋ฉ”์ผ๋ง ๋ฆฌ์ŠคํŠธ์— ์˜ฌ๋ฆฐย ์•”ํ˜ธํ†ตํ™”(cryptocurrency)๋ž€ ๊ตฌ์ƒ์„ ์ตœ์ดˆ๋กœ ๊ตฌํ˜„ํ•œ ๊ฒƒ ์ค‘์˜ ํ•˜๋‚˜์ด๋‹ค.[7][8] ๋น„ํŠธ์ฝ”์ธ์€ย ๊ณต๊ฐœ ํ‚ค ์•”ํ˜ธ ๋ฐฉ์‹์„ ์ด์šฉํ•ด ๊ณต๊ฐœ๋œ ๊ณ„์ •๊ฐ„์— ๊ฑฐ๋ž˜๋ฅผ ํ•œ๋‹ค. ๋ชจ๋“  ๊ฑฐ๋ž˜๋Š” ๋น„๊ณต๊ฐœ์ ์ด๋‚˜ ๊ฑฐ๋ž˜์˜ ๊ธฐ๋ก์€ ๋‚จ์œผ๋ฉฐ,ย ๋ถ„์‚ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์— ์ €์žฅ๋œ๋‹ค.ย ๋ถ„์‚ฐ๋œ ์‹œ๊ฐ„์„œ๋ฒ„๋กœ ์ผ๋ จ์˜ย ์ž‘์—…์ฆ๋ช…(proof-of-work)์„ ํ•˜์—ฌ ์ค‘๋ณต์ง€์ถœ(double-spending)์„ ๋ฐฉ์ง€ํ•œ๋‹ค. ๊ฑฐ๋ž˜ ๊ธฐ๋ก์€ ๋ชจ๋‘ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์— ์ €์žฅ๋˜์–ด์•ผ ํ•œ๋‹ค. ์ €์žฅ์†Œ ํฌ๊ธฐ๋ฅผ ์ค„์ด๊ธฐ ์œ„ํ•ดย ๋จธํด ํŠธ๋ฆฌ(Merkle tree)๊ฐ€ ์‚ฌ์šฉ๋œ๋‹ค.'
    )   
)
# Text Input
context = st.text_area("Context.", value=context_option, height=300, on_change=None) # placeholder="Please input some context..",


if '์Šคํ‹ฐ๋ธ ํด ์Šคํ‹ฐ๋ธŒ ์žก์Šค' in context_option:
    question_option = st.selectbox('๐Ÿ’ก Select Question Examples.',
        (
            '์Šคํ‹ฐ๋ธŒ ์žก์Šค๊ฐ€ ๋ˆ„๊ตฌ์•ผ?', '์Šคํ‹ฐ๋ธŒ ์žก์Šค๋Š” ์• ํ”Œ๋กœ ๋Œ์•„์™€์„œ ์–ด๋–ป๊ฒŒ ํ–ˆ์–ด?', '์™œ ์• ํ”Œ์„ ๋‚˜์™”์–ด?', '์Šคํ‹ฐ๋ธŒ ์žก์Šค๋Š” ์–ด๋–ป๊ฒŒ ๋‹ค์‹œ ์• ํ”Œ๋กœ ๋Œ์•„์˜ค๊ฒŒ ๋˜์—ˆ์–ด?', 'ํ”ฝ์‚ฌ๋Š” ๋ญ˜ ์ œ์ž‘ํ–ˆ์–ด?', '์™œ  ํŒ€ ์ฟก์„ ์ƒˆ๋กœ์šด CEO๋กœ ๋งก์•˜์–ด?', '์Šคํ‹ฐ๋ธŒ ์žก์Šค๋Š” ์–ธ์ œ ์‚ฌ๋งํ–ˆ์–ด?'
        )
    )
elif '๋น„ํŠธ์ฝ”์ธ' in context_option:
    question_option = st.selectbox('๐Ÿ’ก Select Question Examples.',
        (
            '๋น„ํŠธ์ฝ”์ธ์€ ์–ด๋–ค ๊ตฌ์กฐ์•ผ?', '๋น„ํŠธ์ฝ”์ธ์€ ์–ด๋–ป๊ฒŒ ๊ฑฐ๋ž˜๊ฐ€ ๋ผ?', '๋น„ํŠธ์ฝ”์ธ ์ง€๊ฐ‘์—๋Š” ๋ญ๊ฐ€ ๋ถ€์—ฌ ๋ผ?', '๊ณต๊ฐœ๋œ ๊ณ„์ •๊ฐ„ ๊ฑฐ๋ž˜ ์‹œ ๋ญ˜ ์ด์šฉํ•ด?', '๋ชจ๋“  ๊ฑฐ๋ž˜๋Š” ์–ด๋–ป๊ฒŒ ๋‚จ์•„?', '๋จธํด ํŠธ๋ฆฌ๊ฐ€ ์™œ ์‚ฌ์šฉ ๋ผ?'
        )
    )

# Text Area
question = st.text_area("Question.", value=question_option, on_change=None) # placeholder="Please input your question.."



if st.button("Submit", key='question'):
    try:
        # Progress spinner
        with st.spinner('Wait for it...'):
            qa_text_pair = {'context':context, 'question':question}
            result = predict_answer(qa_text_pair)

            # answer
            st.success(result['answer_text'])

    except Exception as e:
        st.error(e)