nktssk commited on
Commit
0cfb3b7
·
verified ·
1 Parent(s): fdc9f1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -30
app.py CHANGED
@@ -26,6 +26,43 @@ from transformers import CLIPProcessor, CLIPModel
26
  # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service_account.json"
27
  ########################################
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def clean_text(text):
30
  # Очистка от некоторых спецсимволов, ссылок, диакритики
31
  text = re.sub(r'МФА:?\s?\[.*?\]', '', text)
@@ -47,11 +84,14 @@ def clean_text(text):
47
 
48
  from num2words import num2words
49
 
50
- def number_to_russian_text(number):
51
- try:
52
- return num2words(number, lang='ru')
53
- except NotImplementedError:
54
- return "Ошибка: Не поддерживается преобразование для этого числа."
 
 
 
55
 
56
  summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
57
 
@@ -64,7 +104,7 @@ t2s_pipe = pipeline("text-to-speech", model="facebook/mms-tts-rus")
64
  translator = pipeline("translation_en_to_ru", model="Helsinki-NLP/opus-mt-en-ru")
65
 
66
  def text_to_speech(text, output_path="speech.wav"):
67
- text = number_to_russian_text(text)
68
  model = VitsModel.from_pretrained("facebook/mms-tts-rus")
69
  tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-rus")
70
 
@@ -177,30 +217,6 @@ with gr.Blocks() as demo:
177
  gr.Markdown("## Две демки: Google Cloud Vision и CLIP (с переводом на русский)")
178
 
179
  with gr.Tabs():
180
- with gr.Tab("CLIP + Sum + Translate + T2S"):
181
- gr.Markdown("### Распознавание (CLIP) и перевод на русский")
182
-
183
- with gr.Row():
184
- image_input_c = gr.Image(label="Загрузите фото", type="pil")
185
- text_input_c = gr.Textbox(label="Или введите название")
186
-
187
- audio_output_c = gr.Audio(label="Результатт")
188
-
189
- with gr.Row():
190
- btn_recognize_c = gr.Button("Распознать и перевести на русский")
191
- btn_text_c = gr.Button("Поиск по тексту")
192
-
193
- btn_recognize_c.click(
194
- fn=process_image_clip,
195
- inputs=image_input_c,
196
- outputs=audio_output_c
197
- )
198
- btn_text_c.click(
199
- fn=process_text_clip,
200
- inputs=text_input_c,
201
- outputs=audio_output_c
202
- )
203
-
204
  with gr.Tab("Google + Sum + T2S"):
205
  gr.Markdown("### Распознавание достопримечательности (Google)")
206
 
@@ -225,4 +241,29 @@ with gr.Blocks() as demo:
225
  outputs=audio_output_g
226
  )
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  demo.launch(debug=True)
 
 
26
  # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service_account.json"
27
  ########################################
28
 
29
+ landmark_titles = [
30
+ "Eiffel Tower", "Taj Mahal", "Statue of Liberty", "Big Ben", "Golden Gate Bridge",
31
+ "Moscow International Business Center", "Colosseum", "Leaning Tower of Pisa",
32
+ "Great Wall of China", "Mount Rushmore", "Sydney Opera House", "Burj Khalifa",
33
+ "Hagia Sophia", "Tower of London", "Notre Dame Cathedral", "Angkor Wat",
34
+ "Petra", "Chichen Itza", "Machu Picchu", "Christ the Redeemer",
35
+ "Acropolis of Athens", "Pyramids of Giza", "Brandenburg Gate", "Sagrada Familia",
36
+ "Versailles Palace", "Santorini", "Niagara Falls", "Banff National Park",
37
+ "Yellowstone National Park", "Grand Canyon", "Victoria Falls", "Mount Everest",
38
+ "Blue Mosque", "Alhambra", "Forbidden City", "Buckingham Palace",
39
+ "Prague Castle", "St. Peter's Basilica", "Matterhorn", "Lake Bled",
40
+ "Stonehenge", "Uluru", "The Louvre", "Hollywood Sign", "Table Mountain",
41
+ "Pompeii", "Edinburgh Castle", "Cappadocia", "Florence Cathedral",
42
+ "Disneyland Paris", "Tokyo Tower", "Mount Fuji", "The Shard",
43
+ "Shwedagon Pagoda", "St. Basil's Cathedral", "Champs-Élysées",
44
+ "Red Square", "Kremlin", "Canals of Venice", "Rialto Bridge",
45
+ "Mont Saint-Michel", "Neuschwanstein Castle", "Schönbrunn Palace",
46
+ "Dubrovnik Old Town", "Lake Baikal", "Himeji Castle", "Berlin Wall",
47
+ "Mecca", "Medina", "Galápagos Islands", "Easter Island",
48
+ "Antelope Canyon", "Plitvice Lakes", "Cinque Terre", "Maldives",
49
+ "Vatican Museums", "Iguazu Falls", "Ha Long Bay", "Jiuzhaigou Valley",
50
+ "Mount Kilimanjaro", "Amazon Rainforest", "Santuario de las Lajas",
51
+ "Giant's Causeway", "Monument Valley", "Moai Statues",
52
+ "Torres del Paine", "Blue Lagoon", "Kruger National Park",
53
+ "White Cliffs of Dover", "Notre-Dame Basilica", "Hohenzollern Castle",
54
+ "Mont Blanc", "Seine River", "Amalfi Coast", "Lake Geneva",
55
+ "Jungfrau Region", "Marina Bay Sands", "Gardens by the Bay",
56
+ "Merlion Park", "Petronas Twin Towers", "Cebu Chocolate Hills",
57
+ "Borobudur Temple", "Kiyomizu-dera", "Nara Park", "Fushimi Inari Shrine",
58
+ "Sequoia National Park", "Yosemite National Park", "Haleakalā National Park",
59
+ "Pike Place Market", "Old Faithful", "Badlands National Park",
60
+ "Hoover Dam", "Bryce Canyon", "Rocky Mountain National Park",
61
+ "Arches National Park", "Great Barrier Reef", "Whitehaven Beach",
62
+ "Fraser Island", "Uluru-Kata Tjuta National Park", "Kangaroo Island",
63
+ "Sydney Harbour Bridge", "Whitsunday Islands"
64
+ ]
65
+
66
  def clean_text(text):
67
  # Очистка от некоторых спецсимволов, ссылок, диакритики
68
  text = re.sub(r'МФА:?\s?\[.*?\]', '', text)
 
84
 
85
  from num2words import num2words
86
 
87
+ def replace_numbers_with_text(input_string):
88
+ def convert_number(match):
89
+ number = match.group(0)
90
+ try:
91
+ return num2words(float(number) if '.' in number else int(number), lang='ru')
92
+ except Exception:
93
+ return number
94
+ return re.sub(r'\d+(\.\d+)?', convert_number, input_string)
95
 
96
  summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
97
 
 
104
  translator = pipeline("translation_en_to_ru", model="Helsinki-NLP/opus-mt-en-ru")
105
 
106
  def text_to_speech(text, output_path="speech.wav"):
107
+ text = replace_numbers_with_text(text)
108
  model = VitsModel.from_pretrained("facebook/mms-tts-rus")
109
  tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-rus")
110
 
 
217
  gr.Markdown("## Две демки: Google Cloud Vision и CLIP (с переводом на русский)")
218
 
219
  with gr.Tabs():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  with gr.Tab("Google + Sum + T2S"):
221
  gr.Markdown("### Распознавание достопримечательности (Google)")
222
 
 
241
  outputs=audio_output_g
242
  )
243
 
244
+ with gr.Tab("CLIP + Sum + Translate + T2S"):
245
+ gr.Markdown("### Распознавание (CLIP) и перевод на русский")
246
+
247
+ with gr.Row():
248
+ image_input_c = gr.Image(label="Загрузите фото", type="pil")
249
+ text_input_c = gr.Textbox(label="Или введите название")
250
+
251
+ audio_output_c = gr.Audio(label="Результатт")
252
+
253
+ with gr.Row():
254
+ btn_recognize_c = gr.Button("Распознать и перевести на русский")
255
+ btn_text_c = gr.Button("Поиск по тексту")
256
+
257
+ btn_recognize_c.click(
258
+ fn=process_image_clip,
259
+ inputs=image_input_c,
260
+ outputs=audio_output_c
261
+ )
262
+ btn_text_c.click(
263
+ fn=process_text_clip,
264
+ inputs=text_input_c,
265
+ outputs=audio_output_c
266
+ )
267
+
268
  demo.launch(debug=True)
269
+