Spaces:

nktssk
/

itis

Running

App Files Files Community

nktssk commited on 22 days ago

Commit

0cfb3b7

verified ·

1 Parent(s): fdc9f1c

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -30

app.py CHANGED Viewed

@@ -26,6 +26,43 @@ from transformers import CLIPProcessor, CLIPModel
 # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service_account.json"
 ########################################
 def clean_text(text):
     # Очистка от некоторых спецсимволов, ссылок, диакритики
     text = re.sub(r'МФА:?\s?\[.*?\]', '', text)
@@ -47,11 +84,14 @@ def clean_text(text):
 from num2words import num2words
-def number_to_russian_text(number):
-    try:
-        return num2words(number, lang='ru')
-    except NotImplementedError:
-        return "Ошибка: Не поддерживается преобразование для этого числа."
 summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
@@ -64,7 +104,7 @@ t2s_pipe = pipeline("text-to-speech", model="facebook/mms-tts-rus")
 translator = pipeline("translation_en_to_ru", model="Helsinki-NLP/opus-mt-en-ru")
 def text_to_speech(text, output_path="speech.wav"):
-  text = number_to_russian_text(text)
   model = VitsModel.from_pretrained("facebook/mms-tts-rus")
   tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-rus")
@@ -177,30 +217,6 @@ with gr.Blocks() as demo:
     gr.Markdown("## Две демки: Google Cloud Vision и CLIP (с переводом на русский)")
     with gr.Tabs():
-        with gr.Tab("CLIP + Sum + Translate + T2S"):
-            gr.Markdown("### Распознавание (CLIP) и перевод на русский")
-            with gr.Row():
-                image_input_c = gr.Image(label="Загрузите фото", type="pil")
-                text_input_c = gr.Textbox(label="Или введите название")
-            audio_output_c = gr.Audio(label="Результатт")
-            with gr.Row():
-                btn_recognize_c = gr.Button("Распознать и перевести на русский")
-                btn_text_c = gr.Button("Поиск по тексту")
-            btn_recognize_c.click(
-                fn=process_image_clip,
-                inputs=image_input_c,
-                outputs=audio_output_c
-            )
-            btn_text_c.click(
-                fn=process_text_clip,
-                inputs=text_input_c,
-                outputs=audio_output_c
-            )
         with gr.Tab("Google + Sum + T2S"):
             gr.Markdown("### Распознавание достопримечательности (Google)")
@@ -225,4 +241,29 @@ with gr.Blocks() as demo:
                 outputs=audio_output_g
             )
 demo.launch(debug=True)

 # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service_account.json"
 ########################################
+landmark_titles = [
+    "Eiffel Tower", "Taj Mahal", "Statue of Liberty", "Big Ben", "Golden Gate Bridge",
+    "Moscow International Business Center", "Colosseum", "Leaning Tower of Pisa",
+    "Great Wall of China", "Mount Rushmore", "Sydney Opera House", "Burj Khalifa",
+    "Hagia Sophia", "Tower of London", "Notre Dame Cathedral", "Angkor Wat",
+    "Petra", "Chichen Itza", "Machu Picchu", "Christ the Redeemer",
+    "Acropolis of Athens", "Pyramids of Giza", "Brandenburg Gate", "Sagrada Familia",
+    "Versailles Palace", "Santorini", "Niagara Falls", "Banff National Park",
+    "Yellowstone National Park", "Grand Canyon", "Victoria Falls", "Mount Everest",
+    "Blue Mosque", "Alhambra", "Forbidden City", "Buckingham Palace",
+    "Prague Castle", "St. Peter's Basilica", "Matterhorn", "Lake Bled",
+    "Stonehenge", "Uluru", "The Louvre", "Hollywood Sign", "Table Mountain",
+    "Pompeii", "Edinburgh Castle", "Cappadocia", "Florence Cathedral",
+    "Disneyland Paris", "Tokyo Tower", "Mount Fuji", "The Shard",
+    "Shwedagon Pagoda", "St. Basil's Cathedral", "Champs-Élysées",
+    "Red Square", "Kremlin", "Canals of Venice", "Rialto Bridge",
+    "Mont Saint-Michel", "Neuschwanstein Castle", "Schönbrunn Palace",
+    "Dubrovnik Old Town", "Lake Baikal", "Himeji Castle", "Berlin Wall",
+    "Mecca", "Medina", "Galápagos Islands", "Easter Island",
+    "Antelope Canyon", "Plitvice Lakes", "Cinque Terre", "Maldives",
+    "Vatican Museums", "Iguazu Falls", "Ha Long Bay", "Jiuzhaigou Valley",
+    "Mount Kilimanjaro", "Amazon Rainforest", "Santuario de las Lajas",
+    "Giant's Causeway", "Monument Valley", "Moai Statues",
+    "Torres del Paine", "Blue Lagoon", "Kruger National Park",
+    "White Cliffs of Dover", "Notre-Dame Basilica", "Hohenzollern Castle",
+    "Mont Blanc", "Seine River", "Amalfi Coast", "Lake Geneva",
+    "Jungfrau Region", "Marina Bay Sands", "Gardens by the Bay",
+    "Merlion Park", "Petronas Twin Towers", "Cebu Chocolate Hills",
+    "Borobudur Temple", "Kiyomizu-dera", "Nara Park", "Fushimi Inari Shrine",
+    "Sequoia National Park", "Yosemite National Park", "Haleakalā National Park",
+    "Pike Place Market", "Old Faithful", "Badlands National Park",
+    "Hoover Dam", "Bryce Canyon", "Rocky Mountain National Park",
+    "Arches National Park", "Great Barrier Reef", "Whitehaven Beach",
+    "Fraser Island", "Uluru-Kata Tjuta National Park", "Kangaroo Island",
+    "Sydney Harbour Bridge", "Whitsunday Islands"
+]
 def clean_text(text):
     # Очистка от некоторых спецсимволов, ссылок, диакритики
     text = re.sub(r'МФА:?\s?\[.*?\]', '', text)
 from num2words import num2words
+def replace_numbers_with_text(input_string):
+    def convert_number(match):
+        number = match.group(0)
+        try:
+            return num2words(float(number) if '.' in number else int(number), lang='ru')
+        except Exception:
+            return number
+    return re.sub(r'\d+(\.\d+)?', convert_number, input_string)
 summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
 translator = pipeline("translation_en_to_ru", model="Helsinki-NLP/opus-mt-en-ru")
 def text_to_speech(text, output_path="speech.wav"):
+  text = replace_numbers_with_text(text)
   model = VitsModel.from_pretrained("facebook/mms-tts-rus")
   tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-rus")
     gr.Markdown("## Две демки: Google Cloud Vision и CLIP (с переводом на русский)")
     with gr.Tabs():
         with gr.Tab("Google + Sum + T2S"):
             gr.Markdown("### Распознавание достопримечательности (Google)")
                 outputs=audio_output_g
             )
+        with gr.Tab("CLIP + Sum + Translate + T2S"):
+            gr.Markdown("### Распознавание (CLIP) и перевод на русский")
+            with gr.Row():
+                image_input_c = gr.Image(label="Загрузите фото", type="pil")
+                text_input_c = gr.Textbox(label="Или введите название")
+            audio_output_c = gr.Audio(label="Результатт")
+            with gr.Row():
+                btn_recognize_c = gr.Button("Распознать и перевести на русский")
+                btn_text_c = gr.Button("Поиск по тексту")
+            btn_recognize_c.click(
+                fn=process_image_clip,
+                inputs=image_input_c,
+                outputs=audio_output_c
+            )
+            btn_text_c.click(
+                fn=process_text_clip,
+                inputs=text_input_c,
+                outputs=audio_output_c
+            )
 demo.launch(debug=True)