Spaces:

speaches-ai
/

speaches

Running on CPU Upgrade

App Files Files Community

Fedir Zadniprovskyi commited on Sep 2, 2024

Commit

93d8861

1 Parent(s): 49f71ac

chore: minor changes to scripts/client.py

Browse files

Files changed (2) hide show

pyproject.toml +1 -0
scripts/client.py +26 -22

pyproject.toml CHANGED Viewed

@@ -70,6 +70,7 @@ ignore = [
     "W505",
     "ISC001", # recommended to disable for formatting
     "INP001",
 ]
 [tool.ruff.lint.isort]

     "W505",
     "ISC001", # recommended to disable for formatting
     "INP001",
+    "PT018",
 ]
 [tool.ruff.lint.isort]

scripts/client.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 from pathlib import Path
 import subprocess
 import threading
 import httpx
 import keyboard
@@ -14,11 +15,12 @@ import keyboard
 # The audio file will be sent to the server for transcription.
 # The transcription will be copied to the clipboard.
 # When having a short audio of a couple of sentences and running inference on a GPU the response time is very fast (less than 2 seconds).  # noqa: E501
 CHUNK = 2**12
 AUDIO_RECORD_CMD = [
     "ffmpeg",
-    # "-hide_banner",
     # "-loglevel",
     # "quiet",
     "-f",
@@ -27,15 +29,6 @@ AUDIO_RECORD_CMD = [
     "default",
     "-f",
     "wav",
-    # "-ac",
-    # "1",
-    # "-ar",
-    # "16000",
-    # "-f",
-    # "s16le",
-    # "-acodec",
-    # "pcm_s16le",
-    # "-",
 ]
 COPY_TO_CLIPBOARD_CMD = "wl-copy"
 OPENAI_BASE_URL = "ws://localhost:8000/v1"
@@ -48,12 +41,13 @@ RESPONSE_FORMAT = "text"
 client = httpx.Client(base_url=OPENAI_BASE_URL, timeout=TIMEOUT)
 is_running = threading.Event()
-file = Path("test.wav")  # TODO: use tempfile
 while True:
     keyboard.wait(KEYBIND)
-    print("Action started")
     process = subprocess.Popen(
         [*AUDIO_RECORD_CMD, "-y", str(file.name)],
         stdout=subprocess.PIPE,
@@ -63,17 +57,27 @@ while True:
     )
     keyboard.wait(KEYBIND)
     process.kill()
-    print("Action finished")
-    with open(file, "rb") as f:
-        res = client.post(
-            OPENAI_BASE_URL + TRANSCRIBE_PATH,
-            files={"file": f},
-            data={
-                "response_format": RESPONSE_FORMAT,
-                "language": LANGUAGE,
-            },
-        )
         transcription = res.text
         print(transcription)
         subprocess.run([COPY_TO_CLIPBOARD_CMD], input=transcription.encode(), check=True)

 from pathlib import Path
 import subprocess
 import threading
+import time
 import httpx
 import keyboard
 # The audio file will be sent to the server for transcription.
 # The transcription will be copied to the clipboard.
 # When having a short audio of a couple of sentences and running inference on a GPU the response time is very fast (less than 2 seconds).  # noqa: E501
+# Run this with `sudo -E python scripts/client.py`
 CHUNK = 2**12
 AUDIO_RECORD_CMD = [
     "ffmpeg",
+    "-hide_banner",
     # "-loglevel",
     # "quiet",
     "-f",
     "default",
     "-f",
     "wav",
 ]
 COPY_TO_CLIPBOARD_CMD = "wl-copy"
 OPENAI_BASE_URL = "ws://localhost:8000/v1"
 client = httpx.Client(base_url=OPENAI_BASE_URL, timeout=TIMEOUT)
 is_running = threading.Event()
+file = Path("test.wav")  # HACK: I had a hard time trying to use a temporary file due to permissions issues
 while True:
     keyboard.wait(KEYBIND)
+    print("Recording started")
     process = subprocess.Popen(
         [*AUDIO_RECORD_CMD, "-y", str(file.name)],
         stdout=subprocess.PIPE,
     )
     keyboard.wait(KEYBIND)
     process.kill()
+    stdout, stderr = process.communicate()
+    if stdout or stderr:
+        print(f"stdout: {stdout}")
+        print(f"stderr: {stderr}")
+    print(f"Recording finished. File size: {file.stat().st_size} bytes")
+    try:
+        with open(file, "rb") as fd:
+            start = time.perf_counter()
+            res = client.post(
+                OPENAI_BASE_URL + TRANSCRIBE_PATH,
+                files={"file": fd},
+                data={
+                    "response_format": RESPONSE_FORMAT,
+                    "language": LANGUAGE,
+                },
+            )
+        end = time.perf_counter()
+        print(f"Transcription took {end - start} seconds")
         transcription = res.text
         print(transcription)
         subprocess.run([COPY_TO_CLIPBOARD_CMD], input=transcription.encode(), check=True)
+    except httpx.ConnectError as e:
+        print(f"Couldn't connect to server: {e}")