chatbot_pajak / app.py
chandra10's picture
Update Spaces
0121888
import nltk
import numpy as np
from nltk.stem.porter import PorterStemmer
import json
import torch
import torch.nn as nn
import random
import gradio as gr
nltk.download('punkt')
nltk.download('punkt_tab')
with open('dataset.json', 'r') as file:
dataset = json.load(file)
class NeuralNetwork(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNetwork, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.l2 = nn.Linear(hidden_size, hidden_size)
self.l3 = nn.Linear(hidden_size, num_classes)
self.relu = nn.ReLU()
def forward(self, x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
out = self.relu(out)
out = self.l3(out)
return out
data = torch.load('model_chatbot.pth', map_location=torch.device('cpu'))
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data["all_words"]
tags = data["tags"]
model_state = data["model_state"]
device = torch.device('cpu')
model = NeuralNetwork(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()
# Fungsi untuk tokenisasi, stemming, dan bag-of-words
stemmer = PorterStemmer()
def tokenize(sentence):
return nltk.word_tokenize(sentence)
def stem(word):
return stemmer.stem(word.lower())
def bag_of_words(tokenized_sentence, all_words):
tokenized_sentence = [stem(w) for w in tokenized_sentence]
bag = np.zeros(len(all_words), dtype=np.float32)
for idx, w in enumerate(all_words):
if w in tokenized_sentence:
bag[idx] = 1.0
return bag
# Fungsi prediksi
def predict(sentence):
sentence = tokenize(sentence)
X = bag_of_words(sentence, all_words)
X = X.reshape(1, X.shape[0])
X = torch.from_numpy(X).to(device)
output = model(X)
_, predicted = torch.max(output, dim=1)
tag = tags[predicted.item()]
probs = torch.softmax(output, dim=1)
prob = probs[0][predicted.item()]
if prob.item() > 0.75:
for intent in dataset['intents']:
if tag == intent["tag"]:
return random.choice(intent['response'])
else:
return "Saya tidak mengerti pertanyaan Anda."
# Buat interface Gradio
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(placeholder="Silahkan masukkan pertanyaan...."),
outputs="markdown",
title="Chatbot Hukum Pajak",
description="Selamat Datang di chatbot pajak.\n\nChatbot ini bisa hanya bisa menjawab beberapa pertanyaan dasar tentang hukum pajak dari pasal 21, pasal 22, pasal 23, dan pasal 24.\n\nContoh pertanyaan seperti berikut:\n\n1. Apa itu pajak?\n\n2. Apa itu pajak penghasilan?\n\n3. Bisakah kamu jelaskan apa itu pajak penghasilan berdasarkan pasal 22?\n\n4. dan lain-lain.\n\nJika ingin melihat bagaimana chatbot ini dilatih, kalian bisa melihat dataset sederhana yang saya buat:\n\nhttps://huggingface.co./chandra10/chatbot_pajak/resolve/main/dataset.json\n\nModel chatbot ini belum sempurna. Jadi, silahkan beri saran di community")
# Jalankan interface
iface.launch(share=True)