roomnumber103 commited on
Commit
9484ade
ยท
1 Parent(s): cc8e236

model upload

Browse files
Files changed (3) hide show
  1. BOK_Q&A.py +0 -71
  2. app.py +68 -4
  3. ref/app.py +7 -0
BOK_Q&A.py DELETED
@@ -1,71 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from sentence_transformers import SentenceTransformer
4
- from sklearn.metrics.pairwise import cosine_similarity
5
-
6
- # ์‚ฌ์ „์ •์˜๋œ Query-Answer๊ฐ€ ๋‹ด๊ธด ํ…Œ์ด๋ธ”
7
- qna_df = pd.read_csv('./data/qa_data.csv')[['์งˆ๋ฌธ', '๋‹ต๋ณ€']]
8
-
9
- qna_df['์งˆ๋ฌธ'] = qna_df['์งˆ๋ฌธ'].apply(lambda x: x.split('์งˆ๋ฌธ\n')[1]) # "์งˆ๋ฌธ\n" ์ œ๊ฑฐ
10
- qna_df['๋‹ต๋ณ€'] = qna_df['๋‹ต๋ณ€'].apply(lambda x: x.split('๋‹ต๋ณ€\n')[1]) # "๋‹ต๋ณ€\n" ์ œ๊ฑฐ
11
-
12
- # SentenceTransformer ๋ชจ๋ธ ๋กœ๋“œ
13
- embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3')
14
-
15
- # ์ฟผ๋ฆฌ ๋ฌธ์žฅ๋“ค์— ๋Œ€ํ•œ ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ ์ƒ์„ฑ
16
- query_texts = qna_df['์งˆ๋ฌธ'].to_list()
17
- query_embeddings = embedding_model.encode(query_texts)
18
-
19
- # query-answer ํ•จ์ˆ˜ ์ •์˜
20
- def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True):
21
- # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ๊ณ„์‚ฐ
22
- new_query_embedding = embedding_model.encode([new_query])
23
-
24
-
25
- # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
26
- cos_sim = cosine_similarity(new_query_embedding, query_embeddings)
27
-
28
- # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ฐ’์ด ๊ฐ€์žฅ ํฐ ์งˆ๋ฌธ์˜ ์ธ๋ฑ์Šค ์ฐพ๊ธฐ
29
- most_similar_idx = np.argmax(cos_sim)
30
- similarity = np.round(cos_sim[0][most_similar_idx], 2)
31
-
32
- # ๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ๊ณผ ๋‹ต๋ณ€ ๊ฐ€์ ธ์˜ค๊ธฐ
33
- similar_query = query_texts[most_similar_idx]
34
- similar_answer = qna_df.iloc[most_similar_idx]['๋‹ต๋ณ€']
35
-
36
- if verbose == True:
37
- print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ : ", similar_query)
38
- print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ์˜ ์œ ์‚ฌ๋„ : ", similarity)
39
- print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ์˜ ๋‹ต: ", similar_answer)
40
-
41
- # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
42
- return similar_query, similarity, similar_answer
43
-
44
- import gradio as gr
45
-
46
- # ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ œ๊ณตํ•˜๋Š” ํ•จ์ˆ˜ (qna_answer_to_query ํ•จ์ˆ˜ ์‚ฌ์šฉ)
47
- def chat_with(message, history):
48
- # ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•ด full_answer_to_query๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ ์ƒ์„ฑ
49
- response = qna_answer_to_query(message)[2]
50
-
51
- # ์งˆ๋ฌธ๊ณผ ๋‹ต๋ณ€์„ ํžˆ์Šคํ† ๋ฆฌ์— ์ €์žฅ (history๋Š” ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ)
52
- history.append((message, response))
53
-
54
- # Gradio๊ฐ€ (์‘๋‹ต, history)๋ฅผ ๋ฐ˜ํ™˜ํ•ด์•ผ ํ•˜๋ฏ€๋กœ, ๋Œ€ํ™” ๊ธฐ๋ก๊ณผ ํ•จ๊ป˜ ๋ฐ˜ํ™˜
55
- return history, history
56
-
57
- # Gradio Chatbot ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
58
- with gr.Blocks() as demo:
59
- chatbot = gr.Chatbot() # ๋Œ€ํ™” ๊ธฐ๋ก์„ ํ‘œ์‹œํ•˜๋Š” ์ปดํฌ๋„ŒํŠธ
60
- msg = gr.Textbox(label="์งˆ๋ฌธ ์ž…๋ ฅ") # ์งˆ๋ฌธ ์ž…๋ ฅ์„ ์œ„ํ•œ ํ…์ŠคํŠธ ๋ฐ•์Šค
61
- clear = gr.Button("๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™”") # ๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ
62
-
63
- # ๋Œ€ํ™”๊ฐ€ ์‹œ์ž‘๋  ๋•Œ ์‹คํ–‰ํ•  ๋™์ž‘ ์ •์˜
64
- msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg]) # ์ž…๋ ฅ๊ฐ’์„ ์ฒ˜๋ฆฌ ํ›„ ์ถœ๋ ฅ
65
-
66
- # ๊ธฐ๋ก ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ ๋™์ž‘ ์ •์˜
67
- clear.click(lambda: [], None, chatbot, queue=False) # ๋Œ€ํ™” ๊ธฐ๋ก์„ ์ดˆ๊ธฐํ™”
68
-
69
- # ์•ฑ ์‹คํ–‰
70
- demo.launch(share=True)
71
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,7 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ # ์‚ฌ์ „์ •์˜๋œ Query-Answer๊ฐ€ ๋‹ด๊ธด ํ…Œ์ด๋ธ”
7
+ qna_df = pd.read_csv('./data/qa_data.csv')[['์งˆ๋ฌธ', '๋‹ต๋ณ€']]
8
+
9
+ qna_df['์งˆ๋ฌธ'] = qna_df['์งˆ๋ฌธ'].apply(lambda x: x.split('์งˆ๋ฌธ\n')[1]) # "์งˆ๋ฌธ\n" ์ œ๊ฑฐ
10
+ qna_df['๋‹ต๋ณ€'] = qna_df['๋‹ต๋ณ€'].apply(lambda x: x.split('๋‹ต๋ณ€\n')[1]) # "๋‹ต๋ณ€\n" ์ œ๊ฑฐ
11
+
12
+ # SentenceTransformer ๋ชจ๋ธ ๋กœ๋“œ
13
+ embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3')
14
+
15
+ # ์ฟผ๋ฆฌ ๋ฌธ์žฅ๋“ค์— ๋Œ€ํ•œ ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ ์ƒ์„ฑ
16
+ query_texts = qna_df['์งˆ๋ฌธ'].to_list()
17
+ query_embeddings = embedding_model.encode(query_texts)
18
+
19
+ # query-answer ํ•จ์ˆ˜ ์ •์˜
20
+ def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True):
21
+ # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ๊ณ„์‚ฐ
22
+ new_query_embedding = embedding_model.encode([new_query])
23
+
24
+
25
+ # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
26
+ cos_sim = cosine_similarity(new_query_embedding, query_embeddings)
27
+
28
+ # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ฐ’์ด ๊ฐ€์žฅ ํฐ ์งˆ๋ฌธ์˜ ์ธ๋ฑ์Šค ์ฐพ๊ธฐ
29
+ most_similar_idx = np.argmax(cos_sim)
30
+ similarity = np.round(cos_sim[0][most_similar_idx], 2)
31
+
32
+ # ๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ๊ณผ ๋‹ต๋ณ€ ๊ฐ€์ ธ์˜ค๊ธฐ
33
+ similar_query = query_texts[most_similar_idx]
34
+ similar_answer = qna_df.iloc[most_similar_idx]['๋‹ต๋ณ€']
35
+
36
+ if verbose == True:
37
+ print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ : ", similar_query)
38
+ print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ์˜ ์œ ์‚ฌ๋„ : ", similarity)
39
+ print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ์˜ ๋‹ต: ", similar_answer)
40
+
41
+ # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
42
+ return similar_query, similarity, similar_answer
43
+
44
  import gradio as gr
45
 
46
+ # ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ œ๊ณตํ•˜๋Š” ํ•จ์ˆ˜ (qna_answer_to_query ํ•จ์ˆ˜ ์‚ฌ์šฉ)
47
+ def chat_with(message, history):
48
+ # ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•ด full_answer_to_query๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ ์ƒ์„ฑ
49
+ response = qna_answer_to_query(message)[2]
50
+
51
+ # ์งˆ๋ฌธ๊ณผ ๋‹ต๋ณ€์„ ํžˆ์Šคํ† ๋ฆฌ์— ์ €์žฅ (history๋Š” ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ)
52
+ history.append((message, response))
53
+
54
+ # Gradio๊ฐ€ (์‘๋‹ต, history)๋ฅผ ๋ฐ˜ํ™˜ํ•ด์•ผ ํ•˜๋ฏ€๋กœ, ๋Œ€ํ™” ๊ธฐ๋ก๊ณผ ํ•จ๊ป˜ ๋ฐ˜ํ™˜
55
+ return history, history
56
+
57
+ # Gradio Chatbot ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
58
+ with gr.Blocks() as demo:
59
+ chatbot = gr.Chatbot() # ๋Œ€ํ™” ๊ธฐ๋ก์„ ํ‘œ์‹œํ•˜๋Š” ์ปดํฌ๋„ŒํŠธ
60
+ msg = gr.Textbox(label="์งˆ๋ฌธ ์ž…๋ ฅ") # ์งˆ๋ฌธ ์ž…๋ ฅ์„ ์œ„ํ•œ ํ…์ŠคํŠธ ๋ฐ•์Šค
61
+ clear = gr.Button("๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™”") # ๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ
62
+
63
+ # ๋Œ€ํ™”๊ฐ€ ์‹œ์ž‘๋  ๋•Œ ์‹คํ–‰ํ•  ๋™์ž‘ ์ •์˜
64
+ msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg]) # ์ž…๋ ฅ๊ฐ’์„ ์ฒ˜๋ฆฌ ํ›„ ์ถœ๋ ฅ
65
+
66
+ # ๊ธฐ๋ก ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ ๋™์ž‘ ์ •์˜
67
+ clear.click(lambda: [], None, chatbot, queue=False) # ๋Œ€ํ™” ๊ธฐ๋ก์„ ์ดˆ๊ธฐํ™”
68
+
69
+ # ์•ฑ ์‹คํ–‰
70
+ demo.launch(share=True)
71
 
 
 
ref/app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()