Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

atlury commited on Sep 16, 2024

Commit

bea6e1a

verified ·

1 Parent(s): 536e020

Update index.html

Browse files

Files changed (1) hide show

index.html +148 -74

index.html CHANGED Viewed

@@ -3,27 +3,143 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Voice Chat Bot with Advanced Echo Cancellation and TinyLLM</title>
     <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
     <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
     <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
     <style>
-        /* ... (previous styles remain unchanged) ... */
-        #model-progress {
             width: 100%;
-            background-color: #444;
             border-radius: 5px;
-            margin-top: 10px;
             overflow: hidden;
         }
-        #model-progress-bar {
-            width: 0;
-            height: 20px;
             background-color: #ffd700;
-            text-align: center;
-            line-height: 20px;
-            color: #1a1a1a;
         }
     </style>
 </head>
@@ -45,10 +161,7 @@
                     <option value="quality">Highest Quality</option>
                 </select>
                 <div id="model-info">
-                    TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
-                </div>
-                <div id="model-progress">
-                    <div id="model-progress-bar"></div>
                 </div>
             </div>
             <div id="visualizer"></div>
@@ -65,6 +178,8 @@
         import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
         env.localModelPath = './models';
         env.backends = ['wasm'];
         env.wasm = env.wasm || {};
         env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
@@ -79,7 +194,6 @@
         const clearLogsButton = document.getElementById('clear-logs');
         const localVideo = document.getElementById('localVideo');
         const remoteVideo = document.getElementById('remoteVideo');
-        const modelProgressBar = document.getElementById('model-progress-bar');
         let myvad;
         let sttPipeline;
@@ -120,38 +234,13 @@
         async function initializePipelines() {
             try {
                 addLog('System: Initializing pipelines...');
-                const tasks = [
-                    { name: 'STT', task: 'automatic-speech-recognition', model: 'Xenova/whisper-tiny.en' },
-                    { name: 'TTS', task: 'text-to-speech', model: 'Xenova/mms-tts-eng' },
-                    { name: 'LLM', task: 'text-generation', model: 'Xenova/tiny-llm' }
-                ];
-                for (const [index, task] of tasks.entries()) {
-                    addLog(`System: Loading ${task.name} model...`);
-                    updateProgressBar((index / tasks.length) * 100);
-                    const pipelineInstance = await pipeline(task.task, task.model, {
-                        quantized: true,
-                        progress_callback: (progress) => {
-                            updateProgressBar(((index + progress) / tasks.length) * 100);
-                        }
-                    });
-                    addLog(`System: ${task.name} model loaded successfully.`);
-                    switch (task.name) {
-                        case 'STT':
-                            sttPipeline = pipelineInstance;
-                            break;
-                        case 'TTS':
-                            ttsPipeline = pipelineInstance;
-                            break;
-                        case 'LLM':
-                            llmPipeline = pipelineInstance;
-                            break;
-                    }
-                }
-                updateProgressBar(100);
-                addLog('System: All pipelines initialized successfully.');
                 startButton.disabled = false;
                 loadingDiv.style.display = 'none';
             } catch (error) {
@@ -161,22 +250,15 @@
             }
         }
-        function updateProgressBar(percentage) {
-            modelProgressBar.style.width = `${percentage}%`;
-            modelProgressBar.textContent = `${Math.round(percentage)}%`;
-        }
         async function processSpeech(audio) {
             try {
                 if (!sttPipeline || !ttsPipeline || !llmPipeline) {
                     throw new Error('Pipelines not initialized');
                 }
-                addLog('System: Processing speech...');
                 const transcription = await sttPipeline(audio);
                 addLog(`User: ${transcription.text}`);
-                addLog('System: Generating LLM response...');
                 const llmResponse = await llmPipeline(transcription.text, {
                     max_new_tokens: 50,
                     temperature: 0.7
@@ -184,12 +266,10 @@
                 const botResponse = llmResponse[0].generated_text;
                 addLog(`Bot: ${botResponse}`);
-                addLog('System: Generating speech from response...');
                 isSpeaking = true;
                 const speechOutput = await ttsPipeline(botResponse);
                 await playAudio(speechOutput.audio);
                 isSpeaking = false;
-                addLog('System: Speech playback complete.');
             } catch (error) {
                 console.error('Error processing speech:', error);
                 addLog(`System: Error processing speech: ${error.message}`);
@@ -204,7 +284,6 @@
             messageElement.textContent = logMessage;
             logsDiv.appendChild(messageElement);
             logsDiv.scrollTop = logsDiv.scrollHeight;
-            console.log(logMessage);
         }
         function playAudio(audioArray) {
@@ -243,7 +322,6 @@
         async function startListening() {
             try {
-                addLog('System: Initializing audio context and stream...');
                 audioContext = new (window.AudioContext || window.webkitAudioContext)();
                 analyser = audioContext.createAnalyser();
                 analyser.fftSize = 128;
@@ -251,10 +329,12 @@
                 localVideo.volume = 0;
                 localVideo.muted = true;
                 remoteVideo.volume = 0;
                 remoteVideo.muted = true;
-                addLog('System: Requesting media stream...');
                 microphoneStream = await navigator.mediaDevices.getUserMedia({
                     audio: true,
                     video: { width: 1, height: 1 }
@@ -263,7 +343,9 @@
                 localVideo.srcObject = microphoneStream;
                 await localVideo.play();
-                addLog('System: Setting up RTCPeerConnection for echo cancellation...');
                 const offerOptions = {
                     offerToReceiveAudio: true,
                     offerToReceiveVideo: false,
@@ -289,21 +371,20 @@
                 const source = audioContext.createMediaStreamSource(loopbackStream);
                 source.connect(analyser);
-                addLog('System: Initializing voice activity detection...');
                 myvad = await vad.MicVAD.new({
                     noiseSuppression: true,
                     aggressiveness: 3,
                     onSpeechStart: () => {
-                        addLog('System: Voice activity detected - speech start');
                         updateVisualizer();
                         if (isSpeaking) {
-                            addLog('System: User interrupted. Stopping bot speech.');
                             stopCurrentAudio();
                             isSpeaking = false;
                         }
                     },
                     onSpeechEnd: (audio) => {
-                        addLog('System: Voice activity detected - speech end');
                         cancelAnimationFrame(animationId);
                         processSpeech(audio);
                     }
@@ -312,7 +393,7 @@
                 await myvad.start();
                 startButton.textContent = 'End Call';
                 isListening = true;
-                addLog('System: Listening started successfully.');
             } catch (error) {
                 console.error('Error starting voice activity:', error);
                 addLog(`System: Error starting voice detection: ${error.message}`);
@@ -320,26 +401,21 @@
         }
         async function stopListening() {
-            addLog('System: Stopping listening...');
             if (myvad) {
                 try {
                     await myvad.destroy();
-                    addLog('System: Voice activity detection stopped.');
                 } catch (error) {
                     console.error('Error stopping voice activity:', error);
-                    addLog(`System: Error stopping voice activity: ${error.message}`);
                 }
                 myvad = null;
             }
             if (microphoneStream) {
                 microphoneStream.getTracks().forEach(track => track.stop());
                 microphoneStream = null;
-                addLog('System: Microphone stream stopped.');
             }
             if (audioContext) {
                 await audioContext.close();
                 audioContext = null;
-                addLog('System: Audio context closed.');
             }
             if (localVideo) {
                 localVideo.srcObject = null;
@@ -350,12 +426,10 @@
             if (rtcConnection) {
                 rtcConnection.close();
                 rtcConnection = null;
-                addLog('System: RTCPeerConnection closed.');
             }
             if (rtcLoopbackConnection) {
                 rtcLoopbackConnection.close();
                 rtcLoopbackConnection = null;
-                addLog('System: RTCPeerConnection loopback closed.');
             }
             loopbackStream = new MediaStream();
             stopCurrentAudio();
@@ -363,12 +437,12 @@
             isListening = false;
             addLog('System: Stopped listening.');
             cancelAnimationFrame(animationId);
         }
         startButton.addEventListener('click', toggleListening);
         clearLogsButton.addEventListener('click', () => {
             logsDiv.innerHTML = '';
-            addLog('System: Logs cleared.');
         });
         createVisualizer();

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Voice Chat Bot with Advanced Echo Cancellation</title>
     <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
     <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
     <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
     <style>
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background-color: #1a1a1a;
+            color: #f0f0f0;
+        }
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+        }
+        h1 {
+            color: #ffd700;
+            text-align: center;
+            margin-bottom: 10px;
+        }
+        .subtitle {
+            text-align: center;
+            color: #ffd700;
+            margin-bottom: 20px;
+        }
+        #chat-container {
+            display: flex;
+            flex-direction: column;
+            height: 70vh;
+        }
+        #conversation {
+            flex-grow: 1;
+            border: 1px solid #444;
+            padding: 10px;
+            overflow-y: scroll;
+            background-color: #2a2a2a;
+            border-radius: 5px;
+            margin-bottom: 20px;
+        }
+        #controls {
+            display: flex;
+            justify-content: center;
+            margin-bottom: 20px;
+        }
+        button {
+            font-size: 18px;
+            padding: 10px 20px;
+            background-color: #ffd700;
+            color: #1a1a1a;
+            border: none;
+            border-radius: 5px;
+            cursor: pointer;
+            transition: background-color 0.3s;
+        }
+        button:hover {
+            background-color: #ffec8b;
+        }
+        button:disabled {
+            background-color: #666;
+            cursor: not-allowed;
+        }
+        #visualizer {
             width: 100%;
+            height: 100px;
+            background-color: #2a2a2a;
             border-radius: 5px;
             overflow: hidden;
+            margin-bottom: 20px;
         }
+        .bar {
+            width: 5px;
+            height: 100%;
             background-color: #ffd700;
+            display: inline-block;
+            margin-right: 1px;
+        }
+        #loading {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background-color: rgba(0, 0, 0, 0.8);
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            z-index: 1000;
+        }
+        .spinner {
+            width: 50px;
+            height: 50px;
+            border: 5px solid #f3f3f3;
+            border-top: 5px solid #ffd700;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        #configuration {
+            margin-bottom: 20px;
+        }
+        select {
+            width: 100%;
+            padding: 10px;
+            font-size: 16px;
+            background-color: #2a2a2a;
+            color: #f0f0f0;
+            border: 1px solid #444;
+            border-radius: 5px;
+        }
+        #model-info {
+            margin-top: 10px;
+            font-size: 14px;
+            color: #aaa;
+        }
+        #logs {
+            background-color: #2a2a2a;
+            border: 1px solid #444;
+            border-radius: 5px;
+            padding: 10px;
+            height: 200px;
+            overflow-y: scroll;
+            font-family: monospace;
+            font-size: 14px;
+        }
+        #clear-logs {
+            margin-top: 10px;
+            font-size: 14px;
+            padding: 5px 10px;
+        }
+        #localVideo, #remoteVideo {
+            display: none;
         }
     </style>
 </head>
                     <option value="quality">Highest Quality</option>
                 </select>
                 <div id="model-info">
+                    TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/gpt2-tiny-english
                 </div>
             </div>
             <div id="visualizer"></div>
         import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
         env.localModelPath = './models';
+        // Configure environment before initializing pipelines
         env.backends = ['wasm'];
         env.wasm = env.wasm || {};
         env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
         const clearLogsButton = document.getElementById('clear-logs');
         const localVideo = document.getElementById('localVideo');
         const remoteVideo = document.getElementById('remoteVideo');
         let myvad;
         let sttPipeline;
         async function initializePipelines() {
             try {
                 addLog('System: Initializing pipelines...');
+                [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
+                    pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
+                    pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
+                    pipeline('text-generation', 'Xenova/gpt2-tiny-english', { quantized: true })
+                ]);
+                addLog('System: Digital Human Voice Chat initialized with GPT-2 Tiny. Click "Begin Call" to start.');
                 startButton.disabled = false;
                 loadingDiv.style.display = 'none';
             } catch (error) {
             }
         }
         async function processSpeech(audio) {
             try {
                 if (!sttPipeline || !ttsPipeline || !llmPipeline) {
                     throw new Error('Pipelines not initialized');
                 }
                 const transcription = await sttPipeline(audio);
                 addLog(`User: ${transcription.text}`);
                 const llmResponse = await llmPipeline(transcription.text, {
                     max_new_tokens: 50,
                     temperature: 0.7
                 const botResponse = llmResponse[0].generated_text;
                 addLog(`Bot: ${botResponse}`);
                 isSpeaking = true;
                 const speechOutput = await ttsPipeline(botResponse);
                 await playAudio(speechOutput.audio);
                 isSpeaking = false;
             } catch (error) {
                 console.error('Error processing speech:', error);
                 addLog(`System: Error processing speech: ${error.message}`);
             messageElement.textContent = logMessage;
             logsDiv.appendChild(messageElement);
             logsDiv.scrollTop = logsDiv.scrollHeight;
         }
         function playAudio(audioArray) {
         async function startListening() {
             try {
                 audioContext = new (window.AudioContext || window.webkitAudioContext)();
                 analyser = audioContext.createAnalyser();
                 analyser.fftSize = 128;
                 localVideo.volume = 0;
                 localVideo.muted = true;
+                document.getElementById('localVideo').volume = 0;
                 remoteVideo.volume = 0;
                 remoteVideo.muted = true;
+                document.getElementById('remoteVideo').volume = 0;
                 microphoneStream = await navigator.mediaDevices.getUserMedia({
                     audio: true,
                     video: { width: 1, height: 1 }
                 localVideo.srcObject = microphoneStream;
                 await localVideo.play();
+                console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
+                console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
                 const offerOptions = {
                     offerToReceiveAudio: true,
                     offerToReceiveVideo: false,
                 const source = audioContext.createMediaStreamSource(loopbackStream);
                 source.connect(analyser);
                 myvad = await vad.MicVAD.new({
                     noiseSuppression: true,
                     aggressiveness: 3,
                     onSpeechStart: () => {
+                        addLog('--- Voice activity: speech start');
                         updateVisualizer();
                         if (isSpeaking) {
+                            addLog('User interrupted. Stopping bot speech.');
                             stopCurrentAudio();
                             isSpeaking = false;
                         }
                     },
                     onSpeechEnd: (audio) => {
+                        addLog('--- Voice activity: speech end');
                         cancelAnimationFrame(animationId);
                         processSpeech(audio);
                     }
                 await myvad.start();
                 startButton.textContent = 'End Call';
                 isListening = true;
+                addLog('System: Listening...');
             } catch (error) {
                 console.error('Error starting voice activity:', error);
                 addLog(`System: Error starting voice detection: ${error.message}`);
         }
         async function stopListening() {
             if (myvad) {
                 try {
                     await myvad.destroy();
                 } catch (error) {
                     console.error('Error stopping voice activity:', error);
                 }
                 myvad = null;
             }
             if (microphoneStream) {
                 microphoneStream.getTracks().forEach(track => track.stop());
                 microphoneStream = null;
             }
             if (audioContext) {
                 await audioContext.close();
                 audioContext = null;
             }
             if (localVideo) {
                 localVideo.srcObject = null;
             if (rtcConnection) {
                 rtcConnection.close();
                 rtcConnection = null;
             }
             if (rtcLoopbackConnection) {
                 rtcLoopbackConnection.close();
                 rtcLoopbackConnection = null;
             }
             loopbackStream = new MediaStream();
             stopCurrentAudio();
             isListening = false;
             addLog('System: Stopped listening.');
             cancelAnimationFrame(animationId);
+            addLog('System: Microphone closed');
         }
         startButton.addEventListener('click', toggleListening);
         clearLogsButton.addEventListener('click', () => {
             logsDiv.innerHTML = '';
         });
         createVisualizer();