Spaces:
Sleeping
Sleeping
Commit
ยท
cc8e236
1
Parent(s):
d5d353f
model upload
Browse files- .DS_Store +0 -0
- BOK_Q&A.py +71 -0
- data/qa_data.csv +0 -0
- requirements.txt +4 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
BOK_Q&A.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
|
6 |
+
# ์ฌ์ ์ ์๋ Query-Answer๊ฐ ๋ด๊ธด ํ
์ด๋ธ
|
7 |
+
qna_df = pd.read_csv('./data/qa_data.csv')[['์ง๋ฌธ', '๋ต๋ณ']]
|
8 |
+
|
9 |
+
qna_df['์ง๋ฌธ'] = qna_df['์ง๋ฌธ'].apply(lambda x: x.split('์ง๋ฌธ\n')[1]) # "์ง๋ฌธ\n" ์ ๊ฑฐ
|
10 |
+
qna_df['๋ต๋ณ'] = qna_df['๋ต๋ณ'].apply(lambda x: x.split('๋ต๋ณ\n')[1]) # "๋ต๋ณ\n" ์ ๊ฑฐ
|
11 |
+
|
12 |
+
# SentenceTransformer ๋ชจ๋ธ ๋ก๋
|
13 |
+
embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3')
|
14 |
+
|
15 |
+
# ์ฟผ๋ฆฌ ๋ฌธ์ฅ๋ค์ ๋ํ ์๋ฒ ๋ฉ ๋ฒกํฐ ์์ฑ
|
16 |
+
query_texts = qna_df['์ง๋ฌธ'].to_list()
|
17 |
+
query_embeddings = embedding_model.encode(query_texts)
|
18 |
+
|
19 |
+
# query-answer ํจ์ ์ ์
|
20 |
+
def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True):
|
21 |
+
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ๊ณ์ฐ
|
22 |
+
new_query_embedding = embedding_model.encode([new_query])
|
23 |
+
|
24 |
+
|
25 |
+
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ณ์ฐ
|
26 |
+
cos_sim = cosine_similarity(new_query_embedding, query_embeddings)
|
27 |
+
|
28 |
+
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ฐ์ด ๊ฐ์ฅ ํฐ ์ง๋ฌธ์ ์ธ๋ฑ์ค ์ฐพ๊ธฐ
|
29 |
+
most_similar_idx = np.argmax(cos_sim)
|
30 |
+
similarity = np.round(cos_sim[0][most_similar_idx], 2)
|
31 |
+
|
32 |
+
# ๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ๊ณผ ๋ต๋ณ ๊ฐ์ ธ์ค๊ธฐ
|
33 |
+
similar_query = query_texts[most_similar_idx]
|
34 |
+
similar_answer = qna_df.iloc[most_similar_idx]['๋ต๋ณ']
|
35 |
+
|
36 |
+
if verbose == True:
|
37 |
+
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ : ", similar_query)
|
38 |
+
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ์ ์ฌ๋ : ", similarity)
|
39 |
+
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ๋ต: ", similar_answer)
|
40 |
+
|
41 |
+
# ๊ฒฐ๊ณผ ๋ฐํ
|
42 |
+
return similar_query, similarity, similar_answer
|
43 |
+
|
44 |
+
import gradio as gr
|
45 |
+
|
46 |
+
# ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ ๊ณตํ๋ ํจ์ (qna_answer_to_query ํจ์ ์ฌ์ฉ)
|
47 |
+
def chat_with(message, history):
|
48 |
+
# ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ํด full_answer_to_query๋ฅผ ์ฌ์ฉํ์ฌ ๋ต๋ณ ์์ฑ
|
49 |
+
response = qna_answer_to_query(message)[2]
|
50 |
+
|
51 |
+
# ์ง๋ฌธ๊ณผ ๋ต๋ณ์ ํ์คํ ๋ฆฌ์ ์ ์ฅ (history๋ ๋ํ ํ์คํ ๋ฆฌ)
|
52 |
+
history.append((message, response))
|
53 |
+
|
54 |
+
# Gradio๊ฐ (์๋ต, history)๋ฅผ ๋ฐํํด์ผ ํ๋ฏ๋ก, ๋ํ ๊ธฐ๋ก๊ณผ ํจ๊ป ๋ฐํ
|
55 |
+
return history, history
|
56 |
+
|
57 |
+
# Gradio Chatbot ์ธํฐํ์ด์ค ์์ฑ
|
58 |
+
with gr.Blocks() as demo:
|
59 |
+
chatbot = gr.Chatbot() # ๋ํ ๊ธฐ๋ก์ ํ์ํ๋ ์ปดํฌ๋ํธ
|
60 |
+
msg = gr.Textbox(label="์ง๋ฌธ ์
๋ ฅ") # ์ง๋ฌธ ์
๋ ฅ์ ์ํ ํ
์คํธ ๋ฐ์ค
|
61 |
+
clear = gr.Button("๋ํ ๊ธฐ๋ก ์ด๊ธฐํ") # ๋ํ ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ
|
62 |
+
|
63 |
+
# ๋ํ๊ฐ ์์๋ ๋ ์คํํ ๋์ ์ ์
|
64 |
+
msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg]) # ์
๋ ฅ๊ฐ์ ์ฒ๋ฆฌ ํ ์ถ๋ ฅ
|
65 |
+
|
66 |
+
# ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ ๋์ ์ ์
|
67 |
+
clear.click(lambda: [], None, chatbot, queue=False) # ๋ํ ๊ธฐ๋ก์ ์ด๊ธฐํ
|
68 |
+
|
69 |
+
# ์ฑ ์คํ
|
70 |
+
demo.launch(share=True)
|
71 |
+
|
data/qa_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
numpy
|
3 |
+
sklearn
|
4 |
+
sentence_transformers
|