AlirezaF138 commited on
Commit
5c9ffe3
·
verified ·
1 Parent(s): 33901fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -1,25 +1,22 @@
1
  import gradio as gr
2
  from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
3
 
4
- # Define the normalization parameters and their default values
5
- normalization_params = {
6
- 'correct_spacing': True,
7
- 'remove_diacritics': True,
8
- 'remove_specials_chars': True,
9
- 'decrease_repeated_chars': True,
10
- 'persian_style': True,
11
- 'persian_numbers': True,
12
- 'unicodes_replacement': True,
13
- 'seperate_mi': True
14
- }
15
-
16
  # Initialize Hazm components
17
  lemmatizer = Lemmatizer()
18
  chunker = Chunker(model='resources/chunker.model')
19
 
20
- def process_text(text, operations, **kwargs):
21
  # Initialize the Normalizer with user-selected parameters
22
- normalizer = Normalizer(**kwargs)
 
 
 
 
 
 
 
 
 
23
  result = {}
24
  if 'normalize' in operations:
25
  text = normalizer.normalize(text)
@@ -43,10 +40,14 @@ iface = gr.Interface(
43
  inputs=[
44
  gr.Textbox(lines=10, label="Input Text"),
45
  gr.CheckboxGroup(operations, label="Operations"),
46
- *[
47
- gr.Checkbox(value=default, label=param.replace('_', ' ').capitalize())
48
- for param, default in normalization_params.items()
49
- ]
 
 
 
 
50
  ],
51
  outputs="json",
52
  title="Persian Text Processor with Hazm",
 
1
  import gradio as gr
2
  from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
3
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # Initialize Hazm components
5
  lemmatizer = Lemmatizer()
6
  chunker = Chunker(model='resources/chunker.model')
7
 
8
+ def process_text(text, operations, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
9
  # Initialize the Normalizer with user-selected parameters
10
+ normalizer = Normalizer(
11
+ correct_spacing=correct_spacing,
12
+ remove_diacritics=remove_diacritics,
13
+ remove_specials_chars=remove_specials_chars,
14
+ decrease_repeated_chars=decrease_repeated_chars,
15
+ persian_style=persian_style,
16
+ persian_numbers=persian_numbers,
17
+ unicodes_replacement=unicodes_replacement,
18
+ seperate_mi=seperate_mi
19
+ )
20
  result = {}
21
  if 'normalize' in operations:
22
  text = normalizer.normalize(text)
 
40
  inputs=[
41
  gr.Textbox(lines=10, label="Input Text"),
42
  gr.CheckboxGroup(operations, label="Operations"),
43
+ gr.Checkbox(value=True, label="Correct Spacing", interactive=True),
44
+ gr.Checkbox(value=True, label="Remove Diacritics", interactive=True),
45
+ gr.Checkbox(value=True, label="Remove Special Characters", interactive=True),
46
+ gr.Checkbox(value=True, label="Decrease Repeated Characters", interactive=True),
47
+ gr.Checkbox(value=True, label="Persian Style", interactive=True),
48
+ gr.Checkbox(value=True, label="Persian Numbers", interactive=True),
49
+ gr.Checkbox(value=True, label="Unicodes Replacement", interactive=True),
50
+ gr.Checkbox(value=True, label="Separate 'می'", interactive=True)
51
  ],
52
  outputs="json",
53
  title="Persian Text Processor with Hazm",