import gradio as gr from transformers import AutoTokenizer import pandas as pd import re from datetime import datetime from huggingface_hub import HfApi, DatasetCard, DatasetCardData, create_repo from gradio_huggingfacehub_search import HuggingfaceHubSearch import os import tempfile import re # --- Configuration --- HF_TOKEN = os.getenv("HF_TOKEN") DATASET_REPO_ID = os.getenv("DATASET_REPO", "Lyte/tokenizer-leaderboard") DATASET_FILE_NAME = "leaderboard.csv" PREDEFINED_TEXT = ''' import gradio as gr from transformers import AutoTokenizer import pandas as pd import re from datetime import datetime from huggingface_hub import HfApi, DatasetCard, DatasetCardData, create_repo from gradio_huggingfacehub_search import HuggingfaceHubSearch import os import tempfile # --- Configuration --- HF_TOKEN = os.getenv("HF_TOKEN") DATASET_REPO_ID = os.getenv("DATASET_REPO", "Lyte/tokenizer-leaderboard") DATASET_FILE_NAME = "leaderboard.csv" PREDEFINED_TEXT = """ The quick brown fox jumps over 12 lazy dogs! 🐕🦺 Special characters: #@%^&*()_+-=[]{}|;:'",.<>/?\\~ Code samples: - Python: def hello(): print("Hello World! 2023") - HTML: