Simranjit commited on
Commit
153a73d
1 Parent(s): 28dbd56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -10
app.py CHANGED
@@ -1,11 +1,28 @@
1
- import gradio as gr
2
  import requests
3
  import os
4
- import numpy as np
5
  from scipy.io.wavfile import write
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  token_hf = os.environ.get('token_hf', None)
8
- print(token_hf)
9
 
10
  API_URL = "https://tfugbov5t776omzd.us-east-1.aws.endpoints.huggingface.cloud"
11
  headers = {
@@ -18,17 +35,90 @@ def query(data):
18
  with open("test.wav", "rb") as f:
19
  ndata = f.read()
20
  response = requests.post(API_URL, headers=headers, data=ndata)
21
- print(response)
22
  return response.json()
23
 
24
  def greet(audio):
25
- print(audio[0])
26
- print(audio[1])
27
- rate = 44100
28
  write('test.wav', audio[0], audio[1])
29
  output = query(audio)
30
- print(output)
31
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- demo = gr.Interface(fn=greet, inputs="audio", outputs="text")
34
  demo.launch()
 
 
1
  import requests
2
  import os
 
3
  from scipy.io.wavfile import write
4
 
5
+ import gradio as gr
6
+ import numpy as np
7
+ import uuid
8
+ #import boto3
9
+ import datetime
10
+ import time
11
+
12
+ # access_key = os.environ.get('access_key', None)
13
+ # secret_access_key = os.environ.get('secret_access_key', None)
14
+
15
+ # session = boto3.Session(
16
+ # aws_access_key_id=access_key,
17
+ # aws_secret_access_key=secret_access_key,
18
+ # )
19
+
20
+ # s3 = session.resource('s3')
21
+
22
+ # BUCKET = "audio-text-938"
23
+
24
+
25
  token_hf = os.environ.get('token_hf', None)
 
26
 
27
  API_URL = "https://tfugbov5t776omzd.us-east-1.aws.endpoints.huggingface.cloud"
28
  headers = {
 
35
  with open("test.wav", "rb") as f:
36
  ndata = f.read()
37
  response = requests.post(API_URL, headers=headers, data=ndata)
 
38
  return response.json()
39
 
40
  def greet(audio):
 
 
 
41
  write('test.wav', audio[0], audio[1])
42
  output = query(audio)
43
+ return output["text"]
44
+
45
+ print("cur path", os.listdir(os.path.join("..", "..", "..")))
46
+
47
+ if not os.path.isdir(os.path.join("..", "..", "..", "data", "hfcache")):
48
+ os.mkdir(os.path.join("..", "..", "..", "data", "hfcache"))
49
+
50
+ if not os.path.isdir(os.path.join("..", "..", "..", "data", "audio")):
51
+ os.mkdir(os.path.join("..", "..", "..", "data", "audio"))
52
+
53
+ if not os.path.isdir(os.path.join("..", "..", "..", "data", "audio_texts")):
54
+ os.mkdir(os.path.join("..", "..", "..", "data", "audio_texts"))
55
+
56
+ os.environ["HF_HOME"] = os.path.join("..", "..", "..", "data", "hfcache")
57
+
58
+
59
+ def post_process(text):
60
+ text = text.replace("nouvelle ligne", "\n")
61
+ text = text.replace("à la ligne", "\n")
62
+ text = text.replace("point d'intérogation", "?")
63
+ text = text.replace("point d'intérrogation", "?")
64
+ text = text.replace("point d'interrogation", "?")
65
+ text = text.replace("point d'interogation", "?")
66
+ text = text.replace(" virgule", ",")
67
+ text = text.replace(" virgule", ",")
68
+ text = text.replace(" deux points", ":")
69
+ text = text.replace(" deux points", ":")
70
+ text = text.replace(" point", ".")
71
+ text = text.replace(" point", ".")
72
+ text = text.replace(" nouveau paragraphe ", "\n\n")
73
+ text = text.replace(" paragraphe ", "\n\n")
74
+ text = text.split("\n")
75
+ text = [t.strip() for t in text]
76
+ text = "\n".join(text)
77
+ return text
78
+
79
+ def transcribe(state, audio):
80
+
81
+ sr, y = audio
82
+ y = y.astype(np.float32)
83
+ y /= np.max(np.abs(y))
84
+
85
+ if state is not None:
86
+ state = np.concatenate([state, y])
87
+ else:
88
+ state = y
89
+
90
+ text = greet([sr, state])
91
+
92
+ text = post_process(text)
93
+
94
+ return state, text
95
+
96
+
97
+ def save_fn(audio, text):
98
+ sr, y = audio
99
+ y = y.astype(np.float32)
100
+ y /= np.max(np.abs(y))
101
+
102
+ uid = str(uuid.uuid4())
103
+
104
+ with open(f"{uid}.txt", "w", encoding="utf-8") as f:
105
+ f.write(text)
106
+ s3.Bucket(BUCKET).upload_file(f"{uid}.txt", f"texts/{uid}.txt") #local path, bucket path
107
+
108
+ write(f"{uid}.wav", sr, y)
109
+ s3.Bucket(BUCKET).upload_file(f"{uid}.wav", f"audios/{uid}.wav") #local path, bucket path
110
+
111
+ return [None, None, ""]
112
+
113
+ with gr.Blocks() as demo:
114
+ state = gr.State(None)
115
+ current_speaches = gr.State(1)
116
+ old_text = gr.State("")
117
+ last_text = gr.State("")
118
+ audio = gr.Audio(streaming=True)
119
+ text = gr.TextArea(show_copy_button=True)
120
+ audio.stream(fn=transcribe, inputs=[state, audio], outputs=[state, text])
121
+ save = gr.Button("save")
122
+ save.click(fn=save_fn, inputs=[audio, text], outputs=[state, audio, text])
123
 
 
124
  demo.launch()