Commit
·
b49c7c6
1
Parent(s):
7adca4e
Allegements
Browse files- requirements.txt +0 -10
- tabs/data_viz_tab.py +0 -3
- tabs/exploration_tab.py +1 -2
- tabs/id_lang_tab.py +0 -3
- tabs/modelisation_dict_tab.py +0 -1
- tabs/modelisation_seq2seq_tab.py +26 -16
requirements.txt
CHANGED
@@ -6,16 +6,13 @@ numpy==1.23.5
|
|
6 |
seaborn==0.13.2
|
7 |
nltk==3.8.1
|
8 |
scikit-learn==1.1.3
|
9 |
-
scikit-learn-extra==0.3.0
|
10 |
gensim==4.3.2
|
11 |
sacrebleu==2.4.0
|
12 |
-
pyspellchecker==0.8.1
|
13 |
spacy==3.6.0
|
14 |
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
|
15 |
https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.6.0/fr_core_news_sm-3.6.0.tar.gz
|
16 |
pillow==9.5.0
|
17 |
wordcloud==1.9.3
|
18 |
-
pathlib==1.0.1
|
19 |
networkx==2.7.0
|
20 |
transformers==4.37.2
|
21 |
keras-nlp==0.6.1
|
@@ -23,13 +20,9 @@ keras==2.12.0
|
|
23 |
tensorflow==2.12.0
|
24 |
sentencepiece==0.1.99
|
25 |
openai-whisper==20231117
|
26 |
-
sounddevice==0.4.6
|
27 |
torch==2.2.0
|
28 |
-
xformers==0.0.24
|
29 |
-
translate==3.6.1
|
30 |
speechrecognition==3.10.1
|
31 |
audio_recorder_streamlit==0.0.8
|
32 |
-
wave==0.0.2
|
33 |
whisper==1.1.10
|
34 |
wavio==0.0.8
|
35 |
filesplit==4.0.1
|
@@ -39,7 +32,4 @@ graphviz==0.20.1
|
|
39 |
gTTS==2.5.1
|
40 |
https://files.pythonhosted.org/packages/cc/58/96aff0e5cb8b59c06232ea7e249ed902d04ec89f52636f5be06ceb0855fe/extra_streamlit_components-0.1.60-py3-none-any.whl
|
41 |
streamlit-option-menu==0.3.12
|
42 |
-
plotly==5.18.0
|
43 |
-
bokeh==3.3.4
|
44 |
-
shap==0.44.1
|
45 |
deep-translator==1.11.4
|
|
|
6 |
seaborn==0.13.2
|
7 |
nltk==3.8.1
|
8 |
scikit-learn==1.1.3
|
|
|
9 |
gensim==4.3.2
|
10 |
sacrebleu==2.4.0
|
|
|
11 |
spacy==3.6.0
|
12 |
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
|
13 |
https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.6.0/fr_core_news_sm-3.6.0.tar.gz
|
14 |
pillow==9.5.0
|
15 |
wordcloud==1.9.3
|
|
|
16 |
networkx==2.7.0
|
17 |
transformers==4.37.2
|
18 |
keras-nlp==0.6.1
|
|
|
20 |
tensorflow==2.12.0
|
21 |
sentencepiece==0.1.99
|
22 |
openai-whisper==20231117
|
|
|
23 |
torch==2.2.0
|
|
|
|
|
24 |
speechrecognition==3.10.1
|
25 |
audio_recorder_streamlit==0.0.8
|
|
|
26 |
whisper==1.1.10
|
27 |
wavio==0.0.8
|
28 |
filesplit==4.0.1
|
|
|
32 |
gTTS==2.5.1
|
33 |
https://files.pythonhosted.org/packages/cc/58/96aff0e5cb8b59c06232ea7e249ed902d04ec89f52636f5be06ceb0855fe/extra_streamlit_components-0.1.60-py3-none-any.whl
|
34 |
streamlit-option-menu==0.3.12
|
|
|
|
|
|
|
35 |
deep-translator==1.11.4
|
tabs/data_viz_tab.py
CHANGED
@@ -7,9 +7,6 @@ import numpy as np
|
|
7 |
import pandas as pd
|
8 |
import matplotlib.pyplot as plt
|
9 |
import seaborn as sns
|
10 |
-
import plotly.express as px
|
11 |
-
import plotly.graph_objects as go
|
12 |
-
import plotly.figure_factory as ff
|
13 |
from wordcloud import WordCloud
|
14 |
import nltk
|
15 |
from nltk.corpus import stopwords
|
|
|
7 |
import pandas as pd
|
8 |
import matplotlib.pyplot as plt
|
9 |
import seaborn as sns
|
|
|
|
|
|
|
10 |
from wordcloud import WordCloud
|
11 |
import nltk
|
12 |
from nltk.corpus import stopwords
|
tabs/exploration_tab.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
-
import numpy as np
|
4 |
import pandas as pd
|
5 |
import collections
|
6 |
from nltk.tokenize import word_tokenize
|
@@ -8,7 +7,7 @@ from nltk import download
|
|
8 |
from ast import literal_eval
|
9 |
from translate_app import tr
|
10 |
if st.session_state.Cloud == 0:
|
11 |
-
import nltk
|
12 |
import contextlib
|
13 |
import re
|
14 |
from nltk.corpus import stopwords
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
|
|
3 |
import pandas as pd
|
4 |
import collections
|
5 |
from nltk.tokenize import word_tokenize
|
|
|
7 |
from ast import literal_eval
|
8 |
from translate_app import tr
|
9 |
if st.session_state.Cloud == 0:
|
10 |
+
# import nltk
|
11 |
import contextlib
|
12 |
import re
|
13 |
from nltk.corpus import stopwords
|
tabs/id_lang_tab.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
-
import os
|
5 |
import matplotlib.pyplot as plt
|
6 |
import tiktoken
|
7 |
-
import random
|
8 |
import joblib
|
9 |
import json
|
10 |
import csv
|
@@ -12,7 +10,6 @@ from transformers import pipeline
|
|
12 |
import keras
|
13 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
14 |
from sklearn.preprocessing import LabelEncoder
|
15 |
-
from sklearn.feature_extraction.text import CountVectorizer
|
16 |
from tensorflow.keras.utils import plot_model
|
17 |
from filesplit.merge import Merge
|
18 |
from extra_streamlit_components import tab_bar, TabBarItemData
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
import tiktoken
|
|
|
6 |
import joblib
|
7 |
import json
|
8 |
import csv
|
|
|
10 |
import keras
|
11 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
12 |
from sklearn.preprocessing import LabelEncoder
|
|
|
13 |
from tensorflow.keras.utils import plot_model
|
14 |
from filesplit.merge import Merge
|
15 |
from extra_streamlit_components import tab_bar, TabBarItemData
|
tabs/modelisation_dict_tab.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
import os
|
5 |
from sacrebleu import corpus_bleu
|
6 |
if st.session_state.Cloud == 0:
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
import os
|
4 |
from sacrebleu import corpus_bleu
|
5 |
if st.session_state.Cloud == 0:
|
tabs/modelisation_seq2seq_tab.py
CHANGED
@@ -4,12 +4,12 @@ import numpy as np
|
|
4 |
import os
|
5 |
from sacrebleu import corpus_bleu
|
6 |
from transformers import pipeline
|
7 |
-
from translate import Translator
|
|
|
8 |
from audio_recorder_streamlit import audio_recorder
|
9 |
import speech_recognition as sr
|
10 |
import whisper
|
11 |
import io
|
12 |
-
# import wave
|
13 |
import wavio
|
14 |
from filesplit.merge import Merge
|
15 |
import tensorflow as tf
|
@@ -19,7 +19,7 @@ from tensorflow import keras
|
|
19 |
from keras_nlp.layers import TransformerEncoder
|
20 |
from tensorflow.keras import layers
|
21 |
from tensorflow.keras.utils import plot_model
|
22 |
-
from PIL import Image
|
23 |
from gtts import gTTS
|
24 |
from extra_streamlit_components import tab_bar, TabBarItemData
|
25 |
from translate_app import tr
|
@@ -463,7 +463,8 @@ def run():
|
|
463 |
with col2:
|
464 |
st.write(":red[**Trad. Google Translate**]")
|
465 |
try:
|
466 |
-
translator = Translator(to_lang=l_tgt, from_lang=Lang_detected)
|
|
|
467 |
if custom_sentence!="":
|
468 |
translation = translator.translate(custom_sentence)
|
469 |
st.write("**"+l_tgt+" :** "+translation)
|
@@ -488,31 +489,39 @@ def run():
|
|
488 |
st.write("## **"+tr("Résultats")+" :**\n")
|
489 |
st.audio(audio_bytes, format="audio/wav")
|
490 |
try:
|
491 |
-
|
492 |
-
|
493 |
-
audio_stream_bytesio = io.BytesIO(audio_bytes)
|
494 |
|
495 |
-
|
496 |
-
|
497 |
|
498 |
-
|
499 |
-
|
500 |
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
|
|
505 |
result = model_speech.transcribe(audio_input)
|
506 |
st.write(tr("Langue détectée")+" : "+result["language"])
|
507 |
Lang_detected = result["language"]
|
508 |
# Transcription Whisper (si result a été préalablement calculé)
|
509 |
custom_sentence = result["text"]
|
510 |
else:
|
|
|
511 |
Lang_detected = l_src
|
512 |
# Transcription google
|
513 |
audio_stream = sr.AudioData(audio_bytes, 32000, 2)
|
514 |
r = sr.Recognizer()
|
515 |
custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
|
517 |
if custom_sentence!="":
|
518 |
# Lang_detected = lang_classifier (custom_sentence)[0]['label']
|
@@ -520,7 +529,8 @@ def run():
|
|
520 |
st.write("")
|
521 |
st.write("**"+Lang_detected+" :** :blue["+custom_sentence+"]")
|
522 |
st.write("")
|
523 |
-
translator = Translator(to_lang=l_tgt, from_lang=Lang_detected)
|
|
|
524 |
translation = translator.translate(custom_sentence)
|
525 |
st.write("**"+l_tgt+" :** "+translation)
|
526 |
st.write("")
|
|
|
4 |
import os
|
5 |
from sacrebleu import corpus_bleu
|
6 |
from transformers import pipeline
|
7 |
+
# from translate import Translator
|
8 |
+
from deep_translator import GoogleTranslator
|
9 |
from audio_recorder_streamlit import audio_recorder
|
10 |
import speech_recognition as sr
|
11 |
import whisper
|
12 |
import io
|
|
|
13 |
import wavio
|
14 |
from filesplit.merge import Merge
|
15 |
import tensorflow as tf
|
|
|
19 |
from keras_nlp.layers import TransformerEncoder
|
20 |
from tensorflow.keras import layers
|
21 |
from tensorflow.keras.utils import plot_model
|
22 |
+
# from PIL import Image
|
23 |
from gtts import gTTS
|
24 |
from extra_streamlit_components import tab_bar, TabBarItemData
|
25 |
from translate_app import tr
|
|
|
463 |
with col2:
|
464 |
st.write(":red[**Trad. Google Translate**]")
|
465 |
try:
|
466 |
+
# translator = Translator(to_lang=l_tgt, from_lang=Lang_detected)
|
467 |
+
translator = GoogleTranslator(source=Lang_detected, target=l_tgt)
|
468 |
if custom_sentence!="":
|
469 |
translation = translator.translate(custom_sentence)
|
470 |
st.write("**"+l_tgt+" :** "+translation)
|
|
|
489 |
st.write("## **"+tr("Résultats")+" :**\n")
|
490 |
st.audio(audio_bytes, format="audio/wav")
|
491 |
try:
|
492 |
+
# Create a BytesIO object from the audio stream
|
493 |
+
audio_stream_bytesio = io.BytesIO(audio_bytes)
|
|
|
494 |
|
495 |
+
# Read the WAV stream using wavio
|
496 |
+
wav = wavio.read(audio_stream_bytesio)
|
497 |
|
498 |
+
# Extract the audio data from the wavio.Wav object
|
499 |
+
audio_data = wav.data
|
500 |
|
501 |
+
# Convert the audio data to a NumPy array
|
502 |
+
audio_input = np.array(audio_data, dtype=np.float32)
|
503 |
+
audio_input = np.mean(audio_input, axis=1)/32768
|
504 |
+
|
505 |
+
if detection:
|
506 |
result = model_speech.transcribe(audio_input)
|
507 |
st.write(tr("Langue détectée")+" : "+result["language"])
|
508 |
Lang_detected = result["language"]
|
509 |
# Transcription Whisper (si result a été préalablement calculé)
|
510 |
custom_sentence = result["text"]
|
511 |
else:
|
512 |
+
# Avec l'aide de la bibliothèque speech_recognition de Google
|
513 |
Lang_detected = l_src
|
514 |
# Transcription google
|
515 |
audio_stream = sr.AudioData(audio_bytes, 32000, 2)
|
516 |
r = sr.Recognizer()
|
517 |
custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
|
518 |
+
|
519 |
+
# Sans la bibliothèque speech_recognition, uniquement avec Whisper
|
520 |
+
'''
|
521 |
+
Lang_detected = l_src
|
522 |
+
result = model_speech.transcribe(audio_input, language=Lang_detected)
|
523 |
+
custom_sentence = result["text"]
|
524 |
+
'''
|
525 |
|
526 |
if custom_sentence!="":
|
527 |
# Lang_detected = lang_classifier (custom_sentence)[0]['label']
|
|
|
529 |
st.write("")
|
530 |
st.write("**"+Lang_detected+" :** :blue["+custom_sentence+"]")
|
531 |
st.write("")
|
532 |
+
# translator = Translator(to_lang=l_tgt, from_lang=Lang_detected)
|
533 |
+
translator = GoogleTranslator(source=Lang_detected, target=l_tgt)
|
534 |
translation = translator.translate(custom_sentence)
|
535 |
st.write("**"+l_tgt+" :** "+translation)
|
536 |
st.write("")
|