Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,22 @@
|
|
1 |
import gradio as gr
|
2 |
from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
|
3 |
|
4 |
-
# Define the normalization parameters and their default values
|
5 |
-
normalization_params = {
|
6 |
-
'correct_spacing': True,
|
7 |
-
'remove_diacritics': True,
|
8 |
-
'remove_specials_chars': True,
|
9 |
-
'decrease_repeated_chars': True,
|
10 |
-
'persian_style': True,
|
11 |
-
'persian_numbers': True,
|
12 |
-
'unicodes_replacement': True,
|
13 |
-
'seperate_mi': True
|
14 |
-
}
|
15 |
-
|
16 |
# Initialize Hazm components
|
17 |
lemmatizer = Lemmatizer()
|
18 |
chunker = Chunker(model='resources/chunker.model')
|
19 |
|
20 |
-
def process_text(text, operations,
|
21 |
# Initialize the Normalizer with user-selected parameters
|
22 |
-
normalizer = Normalizer(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
result = {}
|
24 |
if 'normalize' in operations:
|
25 |
text = normalizer.normalize(text)
|
@@ -43,10 +40,14 @@ iface = gr.Interface(
|
|
43 |
inputs=[
|
44 |
gr.Textbox(lines=10, label="Input Text"),
|
45 |
gr.CheckboxGroup(operations, label="Operations"),
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
50 |
],
|
51 |
outputs="json",
|
52 |
title="Persian Text Processor with Hazm",
|
|
|
1 |
import gradio as gr
|
2 |
from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
# Initialize Hazm components
|
5 |
lemmatizer = Lemmatizer()
|
6 |
chunker = Chunker(model='resources/chunker.model')
|
7 |
|
8 |
+
def process_text(text, operations, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
|
9 |
# Initialize the Normalizer with user-selected parameters
|
10 |
+
normalizer = Normalizer(
|
11 |
+
correct_spacing=correct_spacing,
|
12 |
+
remove_diacritics=remove_diacritics,
|
13 |
+
remove_specials_chars=remove_specials_chars,
|
14 |
+
decrease_repeated_chars=decrease_repeated_chars,
|
15 |
+
persian_style=persian_style,
|
16 |
+
persian_numbers=persian_numbers,
|
17 |
+
unicodes_replacement=unicodes_replacement,
|
18 |
+
seperate_mi=seperate_mi
|
19 |
+
)
|
20 |
result = {}
|
21 |
if 'normalize' in operations:
|
22 |
text = normalizer.normalize(text)
|
|
|
40 |
inputs=[
|
41 |
gr.Textbox(lines=10, label="Input Text"),
|
42 |
gr.CheckboxGroup(operations, label="Operations"),
|
43 |
+
gr.Checkbox(value=True, label="Correct Spacing", interactive=True),
|
44 |
+
gr.Checkbox(value=True, label="Remove Diacritics", interactive=True),
|
45 |
+
gr.Checkbox(value=True, label="Remove Special Characters", interactive=True),
|
46 |
+
gr.Checkbox(value=True, label="Decrease Repeated Characters", interactive=True),
|
47 |
+
gr.Checkbox(value=True, label="Persian Style", interactive=True),
|
48 |
+
gr.Checkbox(value=True, label="Persian Numbers", interactive=True),
|
49 |
+
gr.Checkbox(value=True, label="Unicodes Replacement", interactive=True),
|
50 |
+
gr.Checkbox(value=True, label="Separate 'می'", interactive=True)
|
51 |
],
|
52 |
outputs="json",
|
53 |
title="Persian Text Processor with Hazm",
|