Spaces:
Sleeping
Sleeping
File size: 2,757 Bytes
d14409f 9eda247 d14409f 3037d70 33901fb 5c9ffe3 3037d70 d14409f 3037d70 d14409f 3037d70 9eda247 d14409f 3037d70 d14409f 33901fb 9eda247 d14409f 0b65d75 3037d70 ea34410 d14409f a1b3563 d14409f 3037d70 d14409f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
# Initialize Hazm components
lemmatizer = Lemmatizer()
chunker = Chunker(model='resources/chunker.model')
def process_text(text, operation, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
# Initialize the Normalizer with user-selected parameters
normalizer = Normalizer(
correct_spacing=correct_spacing,
remove_diacritics=remove_diacritics,
remove_specials_chars=remove_specials_chars,
decrease_repeated_chars=decrease_repeated_chars,
persian_style=persian_style,
persian_numbers=persian_numbers,
unicodes_replacement=unicodes_replacement,
seperate_mi=seperate_mi
)
result = ""
if operation == "normalize":
result = normalizer.normalize(text)
elif operation == "tokenize":
tokens = word_tokenize(text)
result = " ".join(tokens) # Show tokens as a space-separated string
elif operation == "lemmatize":
lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
result = " ".join(lemmas) # Show lemmas as a space-separated string
elif operation == "chunk":
pos_tags = word_tokenize(text)
chunks = chunker.parse(pos_tags)
result = str(chunks) # Show chunks as text
return result
# Define Gradio interface
operations = ['normalize', 'tokenize', 'lemmatize', 'chunk']
iface = gr.Interface(
fn=process_text,
inputs=[
gr.Textbox(lines=10, label="Input Text"),
gr.Radio(operations, label="Select Operation", type="value"), # Radio button to select one operation at a time
gr.Checkbox(value=True, label="Correct Spacing", interactive=True),
gr.Checkbox(value=True, label="Remove Diacritics", interactive=True),
gr.Checkbox(value=True, label="Remove Special Characters", interactive=True),
gr.Checkbox(value=True, label="Decrease Repeated Characters", interactive=True),
gr.Checkbox(value=True, label="Persian Style", interactive=True),
gr.Checkbox(value=True, label="Persian Numbers", interactive=True),
gr.Checkbox(value=True, label="Unicodes Replacement", interactive=True),
gr.Checkbox(value=True, label="Separate 'می'", interactive=True)
],
outputs=gr.Textbox(label="Processed Text", interactive=False, lines=10, show_copy_button=True, show_label=True), # Output as copyable text
title="Persian Text Processor with Hazm",
description="Select an operation and normalization parameters to process the input text using Hazm."
)
if __name__ == "__main__":
iface.launch()
|