Spaces:

CristianMongar
/

Audio_a_texto

Sleeping

Mongar28 commited on May 28, 2024

Commit

9276da4

1 Parent(s): 17ae025

Se hicieron mejoras en las estructura del codigo

Files changed (5) hide show

app.py CHANGED Viewed

@@ -6,29 +6,41 @@ from documents.docs import generate_docx
 import os
-def main():
     audio_full_path: str = load_audio_file()
-    if audio_full_path:
         with st.spinner("Transcribiendo..."):
-            transcription = whisper_os(audio_full_path)
             def transcription_generator():
                 for word in transcription["text"].split(' '):
                     time.sleep(0.2)
                     yield word + ' '
             st.write_stream(transcription_generator(), )
             # st.write(transcription)
-            #
             generate_docx(transcription["text"])
-    if os.path.exists(st.session_state.full_path_docx):
         with open(st.session_state.full_path_docx, "rb") as file:
             btn = st.download_button(
                 label="Download docx",
                 data=file,
                 file_name=st.session_state.full_path_docx,
             )
 if __name__ == "__main__":
-    main()

 import os
+pipe = whisper_os()
+def main(pipe):
     audio_full_path: str = load_audio_file()
+    if "validador" not in st.session_state.keys():
+        st.session_state.validador = True
+    if st.session_state.validador == True and audio_full_path:
         with st.spinner("Transcribiendo..."):
+            transcription = pipe(audio_full_path, return_timestamps=True,
+                                 generate_kwargs={"language": "spanish"})
             def transcription_generator():
                 for word in transcription["text"].split(' '):
                     time.sleep(0.2)
                     yield word + ' '
             st.write_stream(transcription_generator(), )
+            if "transcription" not in st.session_state.keys():
+                st.session_state.transcription = transcription["text"]
             # st.write(transcription)
             generate_docx(transcription["text"])
+            st.session_state.validador = False
+    if audio_full_path and os.path.exists(st.session_state.full_path_docx):
         with open(st.session_state.full_path_docx, "rb") as file:
             btn = st.download_button(
                 label="Download docx",
                 data=file,
                 file_name=st.session_state.full_path_docx,
             )
+            if btn:
+                st.write(st.session_state.transcription)
 if __name__ == "__main__":
+    main(pipe)

documents/__pycache__/docs.cpython-312.pyc CHANGED Viewed

Binary files a/documents/__pycache__/docs.cpython-312.pyc and b/documents/__pycache__/docs.cpython-312.pyc differ

documents/docs/nota_de_voz.docx ADDED Viewed

Binary file (36.7 kB). View file

openai_models/whisper.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from transformers import pipeline, AutoModelForCausalLM, AutoModelForSpeechSeq2Seq, AutoProcessor
 import torch
-def whisper_os(audio_full_path: str):
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -32,6 +34,4 @@ def whisper_os(audio_full_path: str):
         device=device,
     )
-    result = pipe(audio_full_path, return_timestamps=True,
-                  generate_kwargs={"language": "spanish"})
-    return result

 from transformers import pipeline, AutoModelForCausalLM, AutoModelForSpeechSeq2Seq, AutoProcessor
 import torch
+import streamlit as st
+@st.cache_resource
+def whisper_os():
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
         device=device,
     )
+    return pipe

streamlit_tools/tools.py CHANGED Viewed

@@ -13,7 +13,7 @@ def load_audio_file() -> str:
     Returns:
     str
     """
-    st.markdown('# **Pastora**')
     st.markdown('### *Transcripción de audio a texto*')
     audio_file = st.file_uploader("Drag your audio file", type=[
                                   '.mp3', '.m4a', '.ogg', '.aac'])

     Returns:
     str
     """
+    st.markdown('# **Díctamelo**')
     st.markdown('### *Transcripción de audio a texto*')
     audio_file = st.file_uploader("Drag your audio file", type=[
                                   '.mp3', '.m4a', '.ogg', '.aac'])