|
import streamlit as st |
|
import numpy as np |
|
import faiss |
|
import pickle |
|
from sentence_transformers import SentenceTransformer |
|
import pandas as pd |
|
|
|
|
|
st.title('직장 괴롭힘 참고 사례 검색과 판단') |
|
|
|
|
|
model = SentenceTransformer('bongsoo/kpf-sbert-v1') |
|
|
|
|
|
df = pd.read_excel('직장괴롭힘_071424.xlsx') |
|
|
|
|
|
|
|
|
|
|
|
with open('embeddingsNorm_직장괴롭힘071424_kpfsbert.pkl', 'rb') as f: |
|
embeddings = pickle.load(f) |
|
|
|
|
|
embeddings_norm = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True) |
|
|
|
|
|
d = embeddings_norm.shape[1] |
|
index = faiss.IndexFlatIP(d) |
|
index.add(embeddings_norm) |
|
|
|
|
|
query_sentence = st.text_area('검색할 문장을 입력하세요', '''예시: 회사 과장님이 회의에서 "조용히 있으라"며 야단치고, 동료들도 회식 등 모임에서 따돌림합니다''') |
|
|
|
if st.button('분석'): |
|
|
|
query_embedding = model.encode([query_sentence]) |
|
|
|
query_embedding_norm = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True) |
|
|
|
k = 5 |
|
distances, indices = index.search(query_embedding_norm, k) |
|
|
|
cosine_similarities = distances[0] |
|
|
|
|
|
|
|
df_result = df.iloc[indices[0]].copy() |
|
df_result['유사도'] = cosine_similarities |
|
|
|
df_result = df_result.sort_values(by='유사도', ascending=False) |
|
|
|
st.write('분석 결과:') |
|
st.write(df_result) |