Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,43 @@ from transformers import CLIPProcessor, CLIPModel
|
|
26 |
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service_account.json"
|
27 |
########################################
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def clean_text(text):
|
30 |
# Очистка от некоторых спецсимволов, ссылок, диакритики
|
31 |
text = re.sub(r'МФА:?\s?\[.*?\]', '', text)
|
@@ -47,11 +84,14 @@ def clean_text(text):
|
|
47 |
|
48 |
from num2words import num2words
|
49 |
|
50 |
-
def
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
55 |
|
56 |
summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
|
57 |
|
@@ -64,7 +104,7 @@ t2s_pipe = pipeline("text-to-speech", model="facebook/mms-tts-rus")
|
|
64 |
translator = pipeline("translation_en_to_ru", model="Helsinki-NLP/opus-mt-en-ru")
|
65 |
|
66 |
def text_to_speech(text, output_path="speech.wav"):
|
67 |
-
text =
|
68 |
model = VitsModel.from_pretrained("facebook/mms-tts-rus")
|
69 |
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-rus")
|
70 |
|
@@ -177,30 +217,6 @@ with gr.Blocks() as demo:
|
|
177 |
gr.Markdown("## Две демки: Google Cloud Vision и CLIP (с переводом на русский)")
|
178 |
|
179 |
with gr.Tabs():
|
180 |
-
with gr.Tab("CLIP + Sum + Translate + T2S"):
|
181 |
-
gr.Markdown("### Распознавание (CLIP) и перевод на русский")
|
182 |
-
|
183 |
-
with gr.Row():
|
184 |
-
image_input_c = gr.Image(label="Загрузите фото", type="pil")
|
185 |
-
text_input_c = gr.Textbox(label="Или введите название")
|
186 |
-
|
187 |
-
audio_output_c = gr.Audio(label="Результатт")
|
188 |
-
|
189 |
-
with gr.Row():
|
190 |
-
btn_recognize_c = gr.Button("Распознать и перевести на русский")
|
191 |
-
btn_text_c = gr.Button("Поиск по тексту")
|
192 |
-
|
193 |
-
btn_recognize_c.click(
|
194 |
-
fn=process_image_clip,
|
195 |
-
inputs=image_input_c,
|
196 |
-
outputs=audio_output_c
|
197 |
-
)
|
198 |
-
btn_text_c.click(
|
199 |
-
fn=process_text_clip,
|
200 |
-
inputs=text_input_c,
|
201 |
-
outputs=audio_output_c
|
202 |
-
)
|
203 |
-
|
204 |
with gr.Tab("Google + Sum + T2S"):
|
205 |
gr.Markdown("### Распознавание достопримечательности (Google)")
|
206 |
|
@@ -225,4 +241,29 @@ with gr.Blocks() as demo:
|
|
225 |
outputs=audio_output_g
|
226 |
)
|
227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
demo.launch(debug=True)
|
|
|
|
26 |
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service_account.json"
|
27 |
########################################
|
28 |
|
29 |
+
landmark_titles = [
|
30 |
+
"Eiffel Tower", "Taj Mahal", "Statue of Liberty", "Big Ben", "Golden Gate Bridge",
|
31 |
+
"Moscow International Business Center", "Colosseum", "Leaning Tower of Pisa",
|
32 |
+
"Great Wall of China", "Mount Rushmore", "Sydney Opera House", "Burj Khalifa",
|
33 |
+
"Hagia Sophia", "Tower of London", "Notre Dame Cathedral", "Angkor Wat",
|
34 |
+
"Petra", "Chichen Itza", "Machu Picchu", "Christ the Redeemer",
|
35 |
+
"Acropolis of Athens", "Pyramids of Giza", "Brandenburg Gate", "Sagrada Familia",
|
36 |
+
"Versailles Palace", "Santorini", "Niagara Falls", "Banff National Park",
|
37 |
+
"Yellowstone National Park", "Grand Canyon", "Victoria Falls", "Mount Everest",
|
38 |
+
"Blue Mosque", "Alhambra", "Forbidden City", "Buckingham Palace",
|
39 |
+
"Prague Castle", "St. Peter's Basilica", "Matterhorn", "Lake Bled",
|
40 |
+
"Stonehenge", "Uluru", "The Louvre", "Hollywood Sign", "Table Mountain",
|
41 |
+
"Pompeii", "Edinburgh Castle", "Cappadocia", "Florence Cathedral",
|
42 |
+
"Disneyland Paris", "Tokyo Tower", "Mount Fuji", "The Shard",
|
43 |
+
"Shwedagon Pagoda", "St. Basil's Cathedral", "Champs-Élysées",
|
44 |
+
"Red Square", "Kremlin", "Canals of Venice", "Rialto Bridge",
|
45 |
+
"Mont Saint-Michel", "Neuschwanstein Castle", "Schönbrunn Palace",
|
46 |
+
"Dubrovnik Old Town", "Lake Baikal", "Himeji Castle", "Berlin Wall",
|
47 |
+
"Mecca", "Medina", "Galápagos Islands", "Easter Island",
|
48 |
+
"Antelope Canyon", "Plitvice Lakes", "Cinque Terre", "Maldives",
|
49 |
+
"Vatican Museums", "Iguazu Falls", "Ha Long Bay", "Jiuzhaigou Valley",
|
50 |
+
"Mount Kilimanjaro", "Amazon Rainforest", "Santuario de las Lajas",
|
51 |
+
"Giant's Causeway", "Monument Valley", "Moai Statues",
|
52 |
+
"Torres del Paine", "Blue Lagoon", "Kruger National Park",
|
53 |
+
"White Cliffs of Dover", "Notre-Dame Basilica", "Hohenzollern Castle",
|
54 |
+
"Mont Blanc", "Seine River", "Amalfi Coast", "Lake Geneva",
|
55 |
+
"Jungfrau Region", "Marina Bay Sands", "Gardens by the Bay",
|
56 |
+
"Merlion Park", "Petronas Twin Towers", "Cebu Chocolate Hills",
|
57 |
+
"Borobudur Temple", "Kiyomizu-dera", "Nara Park", "Fushimi Inari Shrine",
|
58 |
+
"Sequoia National Park", "Yosemite National Park", "Haleakalā National Park",
|
59 |
+
"Pike Place Market", "Old Faithful", "Badlands National Park",
|
60 |
+
"Hoover Dam", "Bryce Canyon", "Rocky Mountain National Park",
|
61 |
+
"Arches National Park", "Great Barrier Reef", "Whitehaven Beach",
|
62 |
+
"Fraser Island", "Uluru-Kata Tjuta National Park", "Kangaroo Island",
|
63 |
+
"Sydney Harbour Bridge", "Whitsunday Islands"
|
64 |
+
]
|
65 |
+
|
66 |
def clean_text(text):
|
67 |
# Очистка от некоторых спецсимволов, ссылок, диакритики
|
68 |
text = re.sub(r'МФА:?\s?\[.*?\]', '', text)
|
|
|
84 |
|
85 |
from num2words import num2words
|
86 |
|
87 |
+
def replace_numbers_with_text(input_string):
|
88 |
+
def convert_number(match):
|
89 |
+
number = match.group(0)
|
90 |
+
try:
|
91 |
+
return num2words(float(number) if '.' in number else int(number), lang='ru')
|
92 |
+
except Exception:
|
93 |
+
return number
|
94 |
+
return re.sub(r'\d+(\.\d+)?', convert_number, input_string)
|
95 |
|
96 |
summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
|
97 |
|
|
|
104 |
translator = pipeline("translation_en_to_ru", model="Helsinki-NLP/opus-mt-en-ru")
|
105 |
|
106 |
def text_to_speech(text, output_path="speech.wav"):
|
107 |
+
text = replace_numbers_with_text(text)
|
108 |
model = VitsModel.from_pretrained("facebook/mms-tts-rus")
|
109 |
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-rus")
|
110 |
|
|
|
217 |
gr.Markdown("## Две демки: Google Cloud Vision и CLIP (с переводом на русский)")
|
218 |
|
219 |
with gr.Tabs():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
with gr.Tab("Google + Sum + T2S"):
|
221 |
gr.Markdown("### Распознавание достопримечательности (Google)")
|
222 |
|
|
|
241 |
outputs=audio_output_g
|
242 |
)
|
243 |
|
244 |
+
with gr.Tab("CLIP + Sum + Translate + T2S"):
|
245 |
+
gr.Markdown("### Распознавание (CLIP) и перевод на русский")
|
246 |
+
|
247 |
+
with gr.Row():
|
248 |
+
image_input_c = gr.Image(label="Загрузите фото", type="pil")
|
249 |
+
text_input_c = gr.Textbox(label="Или введите название")
|
250 |
+
|
251 |
+
audio_output_c = gr.Audio(label="Результатт")
|
252 |
+
|
253 |
+
with gr.Row():
|
254 |
+
btn_recognize_c = gr.Button("Распознать и перевести на русский")
|
255 |
+
btn_text_c = gr.Button("Поиск по тексту")
|
256 |
+
|
257 |
+
btn_recognize_c.click(
|
258 |
+
fn=process_image_clip,
|
259 |
+
inputs=image_input_c,
|
260 |
+
outputs=audio_output_c
|
261 |
+
)
|
262 |
+
btn_text_c.click(
|
263 |
+
fn=process_text_clip,
|
264 |
+
inputs=text_input_c,
|
265 |
+
outputs=audio_output_c
|
266 |
+
)
|
267 |
+
|
268 |
demo.launch(debug=True)
|
269 |
+
|