File size: 1,981 Bytes
54b562a
b1d80a4
54b562a
 
09ded81
d25636a
09ded81
b1d80a4
 
d25636a
09ded81
54b562a
09ded81
2e8f79f
54b562a
09ded81
 
54b562a
 
 
09ded81
2e8f79f
09ded81
2e8f79f
 
09ded81
b1d80a4
09ded81
 
2e8f79f
 
09ded81
 
2e8f79f
54b562a
09ded81
b1d80a4
 
54b562a
09ded81
b1d80a4
54b562a
09ded81
b1d80a4
2e8f79f
b1d80a4
2e8f79f
09ded81
2e8f79f
b1d80a4
04a1c8b
b1d80a4
04a1c8b
b1d80a4
2e8f79f
b1d80a4
54b562a
b1d80a4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import streamlit as st
from transformers import pipeline
import pandas as pd

# Load the Question Answering model
@st.cache_resource
def load_qa_pipeline():
    """Load the QA pipeline with Rifky/Indobert-QA model."""
    return pipeline("question-answering", model="Rifky/Indobert-QA", tokenizer="Rifky/Indobert-QA")

qa_pipeline = load_qa_pipeline()

# Load SOP Dataset
@st.cache_data
def load_sop_dataset():
    """Load SOP dataset from CSV."""
    return pd.read_csv("dataset.csv")  # Ensure this file is uploaded to your Hugging Face Space

dataset = load_sop_dataset()

# Utility function to find the most relevant context
def find_best_context(question, dataset):
    """Find the single best context for a given question."""
    best_score = 0
    best_context = None

    for _, row in dataset.iterrows():
        context_text = row['text']
        overlap = len(set(question.lower().split()) & set(context_text.lower().split()))
        if overlap > best_score:
            best_score = overlap
            best_context = context_text

    return best_context

# Streamlit UI
st.title("Sistem Penjawab Pertanyaan SOP dengan IndoBERT")
st.markdown("Ajukan pertanyaan seputar Prosedur Operasional Standar:")

# User input
question = st.text_area("Masukkan pertanyaan Anda:", "")

# Generate answer
if st.button("Dapatkan Jawaban"):
    if question:
        with st.spinner("Menemukan konteks yang paling relevan..."):
            context = find_best_context(question, dataset)

            if context:
                with st.spinner("Menjawab pertanyaan Anda..."):
                    result = qa_pipeline(question=question, context=context)
                    st.success("Jawaban:")
                    st.write(result["answer"])
                    st.write("Skor Keyakinan:", result["score"])
            else:
                st.warning("Konteks yang relevan tidak ditemukan. Silakan coba pertanyaan lain.")
    else:
        st.warning("Silakan masukkan pertanyaan.")