abdulmatinomotoso commited on
Commit
ba997fd
1 Parent(s): d40b1ef

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #importing the necessary libraries
2
+ import gradio as gr
3
+ import numpy as np
4
+ import pandas as pd
5
+ import re
6
+ import torch
7
+
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+
10
+ #Defining the models and tokenuzer
11
+ model_name = "valurank/distilroberta-topic-classification"
12
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
13
+ #model.to(device)
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+
16
+
17
+ def clean_text(raw_text):
18
+ text = raw_text.encode("ascii", errors="ignore").decode(
19
+ "ascii"
20
+ ) # remove non-ascii, Chinese characters
21
+
22
+ text = re.sub(r"\n", " ", text)
23
+ text = re.sub(r"\n\n", " ", text)
24
+ text = re.sub(r"\t", " ", text)
25
+ text = text.strip(" ")
26
+ text = re.sub(
27
+ " +", " ", text
28
+ ).strip() # get rid of multiple spaces and replace with a single
29
+
30
+ text = re.sub(r"Date\s\d{1,2}\/\d{1,2}\/\d{4}", "", text) #remove date
31
+ text = re.sub(r"\d{1,2}:\d{2}\s[A-Z]+\s[A-Z]+", "", text) #remove time
32
+
33
+ return text
34
+
35
+
36
+ def find_two_highest_indices(arr):
37
+ if len(arr) < 2:
38
+ raise ValueError("Array must have at least two elements")
39
+
40
+ # Initialize the indices of the two highest values
41
+ max_idx = second_max_idx = None
42
+
43
+ for i, value in enumerate(arr):
44
+ if max_idx is None or value > arr[max_idx]:
45
+ second_max_idx = max_idx
46
+ max_idx = i
47
+ elif second_max_idx is None or value > arr[second_max_idx]:
48
+ second_max_idx = i
49
+
50
+ return max_idx, second_max_idx
51
+
52
+
53
+ def predict_topic(text):
54
+ text = clean_text(text)
55
+ dict_topic = {}
56
+
57
+ input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
58
+ logits = model(input_tensor).logits
59
+
60
+ softmax = torch.nn.Softmax(dim=1)
61
+ probs = softmax(logits)[0]
62
+ probs = probs.cpu().detach().numpy()
63
+
64
+ max_index = find_two_highest_indices(probs)
65
+ emotion_1, emotion_2 = labels[max_index[0]], labels[max_index[1]]
66
+ probs_1, probs_2 = probs[max_index[0]], probs[max_index[1]]
67
+ dict_topic[emotion_1] = round((probs_1), 2)
68
+
69
+ #if probs_2 > 0.01:
70
+ dict_topic[emotion_2] = round((probs_2), 2)
71
+
72
+ return dict_topic
73
+
74
+
75
+ #Creating the interface for the radio appdemo = gr.Interface(multi_label_emotions, inputs=gr.Textbox(),
76
+ demo = gr.Interface(predict_topic, inputs=gr.Textbox(),
77
+ outputs = gr.Label(num_top_classes=2),
78
+ title="News Topic Classification")
79
+
80
+ if __name__ == "__main__":
81
+ demo.launch(debug=True)