Spaces:
Sleeping
Sleeping
roomnumber103
commited on
Commit
ยท
9484ade
1
Parent(s):
cc8e236
model upload
Browse files- BOK_Q&A.py +0 -71
- app.py +68 -4
- ref/app.py +7 -0
BOK_Q&A.py
DELETED
@@ -1,71 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import pandas as pd
|
3 |
-
from sentence_transformers import SentenceTransformer
|
4 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
-
|
6 |
-
# ์ฌ์ ์ ์๋ Query-Answer๊ฐ ๋ด๊ธด ํ
์ด๋ธ
|
7 |
-
qna_df = pd.read_csv('./data/qa_data.csv')[['์ง๋ฌธ', '๋ต๋ณ']]
|
8 |
-
|
9 |
-
qna_df['์ง๋ฌธ'] = qna_df['์ง๋ฌธ'].apply(lambda x: x.split('์ง๋ฌธ\n')[1]) # "์ง๋ฌธ\n" ์ ๊ฑฐ
|
10 |
-
qna_df['๋ต๋ณ'] = qna_df['๋ต๋ณ'].apply(lambda x: x.split('๋ต๋ณ\n')[1]) # "๋ต๋ณ\n" ์ ๊ฑฐ
|
11 |
-
|
12 |
-
# SentenceTransformer ๋ชจ๋ธ ๋ก๋
|
13 |
-
embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3')
|
14 |
-
|
15 |
-
# ์ฟผ๋ฆฌ ๋ฌธ์ฅ๋ค์ ๋ํ ์๋ฒ ๋ฉ ๋ฒกํฐ ์์ฑ
|
16 |
-
query_texts = qna_df['์ง๋ฌธ'].to_list()
|
17 |
-
query_embeddings = embedding_model.encode(query_texts)
|
18 |
-
|
19 |
-
# query-answer ํจ์ ์ ์
|
20 |
-
def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True):
|
21 |
-
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ๊ณ์ฐ
|
22 |
-
new_query_embedding = embedding_model.encode([new_query])
|
23 |
-
|
24 |
-
|
25 |
-
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ณ์ฐ
|
26 |
-
cos_sim = cosine_similarity(new_query_embedding, query_embeddings)
|
27 |
-
|
28 |
-
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ฐ์ด ๊ฐ์ฅ ํฐ ์ง๋ฌธ์ ์ธ๋ฑ์ค ์ฐพ๊ธฐ
|
29 |
-
most_similar_idx = np.argmax(cos_sim)
|
30 |
-
similarity = np.round(cos_sim[0][most_similar_idx], 2)
|
31 |
-
|
32 |
-
# ๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ๊ณผ ๋ต๋ณ ๊ฐ์ ธ์ค๊ธฐ
|
33 |
-
similar_query = query_texts[most_similar_idx]
|
34 |
-
similar_answer = qna_df.iloc[most_similar_idx]['๋ต๋ณ']
|
35 |
-
|
36 |
-
if verbose == True:
|
37 |
-
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ : ", similar_query)
|
38 |
-
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ์ ์ฌ๋ : ", similarity)
|
39 |
-
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ๋ต: ", similar_answer)
|
40 |
-
|
41 |
-
# ๊ฒฐ๊ณผ ๋ฐํ
|
42 |
-
return similar_query, similarity, similar_answer
|
43 |
-
|
44 |
-
import gradio as gr
|
45 |
-
|
46 |
-
# ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ ๊ณตํ๋ ํจ์ (qna_answer_to_query ํจ์ ์ฌ์ฉ)
|
47 |
-
def chat_with(message, history):
|
48 |
-
# ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ํด full_answer_to_query๋ฅผ ์ฌ์ฉํ์ฌ ๋ต๋ณ ์์ฑ
|
49 |
-
response = qna_answer_to_query(message)[2]
|
50 |
-
|
51 |
-
# ์ง๋ฌธ๊ณผ ๋ต๋ณ์ ํ์คํ ๋ฆฌ์ ์ ์ฅ (history๋ ๋ํ ํ์คํ ๋ฆฌ)
|
52 |
-
history.append((message, response))
|
53 |
-
|
54 |
-
# Gradio๊ฐ (์๋ต, history)๋ฅผ ๋ฐํํด์ผ ํ๋ฏ๋ก, ๋ํ ๊ธฐ๋ก๊ณผ ํจ๊ป ๋ฐํ
|
55 |
-
return history, history
|
56 |
-
|
57 |
-
# Gradio Chatbot ์ธํฐํ์ด์ค ์์ฑ
|
58 |
-
with gr.Blocks() as demo:
|
59 |
-
chatbot = gr.Chatbot() # ๋ํ ๊ธฐ๋ก์ ํ์ํ๋ ์ปดํฌ๋ํธ
|
60 |
-
msg = gr.Textbox(label="์ง๋ฌธ ์
๋ ฅ") # ์ง๋ฌธ ์
๋ ฅ์ ์ํ ํ
์คํธ ๋ฐ์ค
|
61 |
-
clear = gr.Button("๋ํ ๊ธฐ๋ก ์ด๊ธฐํ") # ๋ํ ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ
|
62 |
-
|
63 |
-
# ๋ํ๊ฐ ์์๋ ๋ ์คํํ ๋์ ์ ์
|
64 |
-
msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg]) # ์
๋ ฅ๊ฐ์ ์ฒ๋ฆฌ ํ ์ถ๋ ฅ
|
65 |
-
|
66 |
-
# ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ ๋์ ์ ์
|
67 |
-
clear.click(lambda: [], None, chatbot, queue=False) # ๋ํ ๊ธฐ๋ก์ ์ด๊ธฐํ
|
68 |
-
|
69 |
-
# ์ฑ ์คํ
|
70 |
-
demo.launch(share=True)
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,7 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
|
7 |
-
demo.launch()
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
|
6 |
+
# ์ฌ์ ์ ์๋ Query-Answer๊ฐ ๋ด๊ธด ํ
์ด๋ธ
|
7 |
+
qna_df = pd.read_csv('./data/qa_data.csv')[['์ง๋ฌธ', '๋ต๋ณ']]
|
8 |
+
|
9 |
+
qna_df['์ง๋ฌธ'] = qna_df['์ง๋ฌธ'].apply(lambda x: x.split('์ง๋ฌธ\n')[1]) # "์ง๋ฌธ\n" ์ ๊ฑฐ
|
10 |
+
qna_df['๋ต๋ณ'] = qna_df['๋ต๋ณ'].apply(lambda x: x.split('๋ต๋ณ\n')[1]) # "๋ต๋ณ\n" ์ ๊ฑฐ
|
11 |
+
|
12 |
+
# SentenceTransformer ๋ชจ๋ธ ๋ก๋
|
13 |
+
embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3')
|
14 |
+
|
15 |
+
# ์ฟผ๋ฆฌ ๋ฌธ์ฅ๋ค์ ๋ํ ์๋ฒ ๋ฉ ๋ฒกํฐ ์์ฑ
|
16 |
+
query_texts = qna_df['์ง๋ฌธ'].to_list()
|
17 |
+
query_embeddings = embedding_model.encode(query_texts)
|
18 |
+
|
19 |
+
# query-answer ํจ์ ์ ์
|
20 |
+
def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True):
|
21 |
+
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ๊ณ์ฐ
|
22 |
+
new_query_embedding = embedding_model.encode([new_query])
|
23 |
+
|
24 |
+
|
25 |
+
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ณ์ฐ
|
26 |
+
cos_sim = cosine_similarity(new_query_embedding, query_embeddings)
|
27 |
+
|
28 |
+
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ฐ์ด ๊ฐ์ฅ ํฐ ์ง๋ฌธ์ ์ธ๋ฑ์ค ์ฐพ๊ธฐ
|
29 |
+
most_similar_idx = np.argmax(cos_sim)
|
30 |
+
similarity = np.round(cos_sim[0][most_similar_idx], 2)
|
31 |
+
|
32 |
+
# ๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ๊ณผ ๋ต๋ณ ๊ฐ์ ธ์ค๊ธฐ
|
33 |
+
similar_query = query_texts[most_similar_idx]
|
34 |
+
similar_answer = qna_df.iloc[most_similar_idx]['๋ต๋ณ']
|
35 |
+
|
36 |
+
if verbose == True:
|
37 |
+
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ : ", similar_query)
|
38 |
+
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ์ ์ฌ๋ : ", similarity)
|
39 |
+
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ๋ต: ", similar_answer)
|
40 |
+
|
41 |
+
# ๊ฒฐ๊ณผ ๋ฐํ
|
42 |
+
return similar_query, similarity, similar_answer
|
43 |
+
|
44 |
import gradio as gr
|
45 |
|
46 |
+
# ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ ๊ณตํ๋ ํจ์ (qna_answer_to_query ํจ์ ์ฌ์ฉ)
|
47 |
+
def chat_with(message, history):
|
48 |
+
# ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ํด full_answer_to_query๋ฅผ ์ฌ์ฉํ์ฌ ๋ต๋ณ ์์ฑ
|
49 |
+
response = qna_answer_to_query(message)[2]
|
50 |
+
|
51 |
+
# ์ง๋ฌธ๊ณผ ๋ต๋ณ์ ํ์คํ ๋ฆฌ์ ์ ์ฅ (history๋ ๋ํ ํ์คํ ๋ฆฌ)
|
52 |
+
history.append((message, response))
|
53 |
+
|
54 |
+
# Gradio๊ฐ (์๋ต, history)๋ฅผ ๋ฐํํด์ผ ํ๋ฏ๋ก, ๋ํ ๊ธฐ๋ก๊ณผ ํจ๊ป ๋ฐํ
|
55 |
+
return history, history
|
56 |
+
|
57 |
+
# Gradio Chatbot ์ธํฐํ์ด์ค ์์ฑ
|
58 |
+
with gr.Blocks() as demo:
|
59 |
+
chatbot = gr.Chatbot() # ๋ํ ๊ธฐ๋ก์ ํ์ํ๋ ์ปดํฌ๋ํธ
|
60 |
+
msg = gr.Textbox(label="์ง๋ฌธ ์
๋ ฅ") # ์ง๋ฌธ ์
๋ ฅ์ ์ํ ํ
์คํธ ๋ฐ์ค
|
61 |
+
clear = gr.Button("๋ํ ๊ธฐ๋ก ์ด๊ธฐํ") # ๋ํ ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ
|
62 |
+
|
63 |
+
# ๋ํ๊ฐ ์์๋ ๋ ์คํํ ๋์ ์ ์
|
64 |
+
msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg]) # ์
๋ ฅ๊ฐ์ ์ฒ๋ฆฌ ํ ์ถ๋ ฅ
|
65 |
+
|
66 |
+
# ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ ๋์ ์ ์
|
67 |
+
clear.click(lambda: [], None, chatbot, queue=False) # ๋ํ ๊ธฐ๋ก์ ์ด๊ธฐํ
|
68 |
+
|
69 |
+
# ์ฑ ์คํ
|
70 |
+
demo.launch(share=True)
|
71 |
|
|
|
|
ref/app.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
def greet(name):
|
4 |
+
return "Hello " + name + "!!"
|
5 |
+
|
6 |
+
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
|
7 |
+
demo.launch()
|