Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

atlury commited on Sep 16, 2024

Commit

b15f4c6

verified ·

1 Parent(s): 08925c7

Create index.backup6.html

Browse files

Files changed (1) hide show

index.backup6.html +459 -0

index.backup6.html ADDED Viewed

	@@ -0,0 +1,459 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Voice Chat Bot with Advanced Echo Cancellation</title>
+    <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
+    <style>
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background-color: #1a1a1a;
+            color: #f0f0f0;
+        }
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+        }
+        h1 {
+            color: #ffd700;
+            text-align: center;
+            margin-bottom: 10px;
+        }
+        .subtitle {
+            text-align: center;
+            color: #ffd700;
+            margin-bottom: 20px;
+        }
+        #chat-container {
+            display: flex;
+            flex-direction: column;
+            height: 70vh;
+        }
+        #conversation {
+            flex-grow: 1;
+            border: 1px solid #444;
+            padding: 10px;
+            overflow-y: scroll;
+            background-color: #2a2a2a;
+            border-radius: 5px;
+            margin-bottom: 20px;
+        }
+        #controls {
+            display: flex;
+            justify-content: center;
+            margin-bottom: 20px;
+        }
+        button {
+            font-size: 18px;
+            padding: 10px 20px;
+            background-color: #ffd700;
+            color: #1a1a1a;
+            border: none;
+            border-radius: 5px;
+            cursor: pointer;
+            transition: background-color 0.3s;
+        }
+        button:hover {
+            background-color: #ffec8b;
+        }
+        button:disabled {
+            background-color: #666;
+            cursor: not-allowed;
+        }
+        #visualizer {
+            width: 100%;
+            height: 100px;
+            background-color: #2a2a2a;
+            border-radius: 5px;
+            overflow: hidden;
+            margin-bottom: 20px;
+        }
+        .bar {
+            width: 5px;
+            height: 100%;
+            background-color: #ffd700;
+            display: inline-block;
+            margin-right: 1px;
+        }
+        #loading {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background-color: rgba(0, 0, 0, 0.8);
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            z-index: 1000;
+        }
+        .spinner {
+            width: 50px;
+            height: 50px;
+            border: 5px solid #f3f3f3;
+            border-top: 5px solid #ffd700;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        #configuration {
+            margin-bottom: 20px;
+        }
+        select {
+            width: 100%;
+            padding: 10px;
+            font-size: 16px;
+            background-color: #2a2a2a;
+            color: #f0f0f0;
+            border: 1px solid #444;
+            border-radius: 5px;
+        }
+        #model-info {
+            margin-top: 10px;
+            font-size: 14px;
+            color: #aaa;
+        }
+        #logs {
+            background-color: #2a2a2a;
+            border: 1px solid #444;
+            border-radius: 5px;
+            padding: 10px;
+            height: 200px;
+            overflow-y: scroll;
+            font-family: monospace;
+            font-size: 14px;
+        }
+        #clear-logs {
+            margin-top: 10px;
+            font-size: 14px;
+            padding: 5px 10px;
+        }
+        #localVideo, #remoteVideo {
+            display: none;
+        }
+    </style>
+</head>
+<body>
+    <div id="loading">
+        <div class="spinner"></div>
+    </div>
+    <div class="container">
+        <h1>Digital Human Voice Chat</h1>
+        <p class="subtitle">For best results, use headphones.</p>
+        <div id="chat-container">
+            <div id="controls">
+                <button id="startButton" disabled>Begin Call</button>
+            </div>
+            <div id="configuration">
+                <select id="configSelect">
+                    <option value="fastest">Fastest</option>
+                    <option value="balanced">Balanced</option>
+                    <option value="quality">Highest Quality</option>
+                </select>
+                <div id="model-info">
+                    TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
+                </div>
+            </div>
+            <div id="visualizer"></div>
+            <div id="conversation"></div>
+        </div>
+        <h2>Logs</h2>
+        <div id="logs"></div>
+        <button id="clear-logs">Clear</button>
+    </div>
+    <video id="localVideo" autoplay></video>
+    <video id="remoteVideo" autoplay></video>
+    <script type="module">
+        import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
+        env.localModelPath = './models';
+        //BELOW 5 statements added by RAHUL
+        // Configure environment before initializing pipelines
+        env.backends = ['wasm'];
+        env.wasm = env.wasm || {};
+        env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/'; // Ensure correct WASM paths
+        env.wasm.simd = true; // Enable SIMD if available
+        env.numThreads = navigator.hardwareConcurrency || 4; // Use available CPU cores
+        const conversationDiv = document.getElementById('conversation');
+        const startButton = document.getElementById('startButton');
+        const visualizer = document.getElementById('visualizer');
+        const loadingDiv = document.getElementById('loading');
+        const logsDiv = document.getElementById('logs');
+        const clearLogsButton = document.getElementById('clear-logs');
+        const localVideo = document.getElementById('localVideo');
+        const remoteVideo = document.getElementById('remoteVideo');
+        let myvad;
+        let sttPipeline;
+        let ttsPipeline;
+        let audioContext;
+        let analyser;
+        let dataArray;
+        let bars;
+        let animationId;
+        let isListening = false;
+        let microphoneStream;
+        let isSpeaking = false;
+        let currentAudioSource = null;
+        let rtcConnection = null;
+        let rtcLoopbackConnection = null;
+        let loopbackStream = new MediaStream();
+        function createVisualizer() {
+            const barCount = 64;
+            for (let i = 0; i < barCount; i++) {
+                const bar = document.createElement('div');
+                bar.className = 'bar';
+                visualizer.appendChild(bar);
+            }
+            bars = visualizer.getElementsByClassName('bar');
+        }
+        function updateVisualizer() {
+            analyser.getByteFrequencyData(dataArray);
+            for (let i = 0; i < bars.length; i++) {
+                const barHeight = dataArray[i] / 2;
+                bars[i].style.height = barHeight + 'px';
+            }
+              // Use setTimeout instead of requestAnimationFrame to reduce update frequency - RAHUL ATLURY
+              animationId = setTimeout(updateVisualizer, 50); // Update every 50ms - RAHUL ATLURY
+            //animationId = requestAnimationFrame(updateVisualizer);
+        }
+        async function initializePipelines() {
+            try {
+                //sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true });  // added , { quantized: true }
+                //ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
+                //    quantized: true,   //changed to true - RAHUL ATLURY
+                //});
+                [sttPipeline, ttsPipeline] = await Promise.all([
+                  pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
+                  pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
+                ]);
+                addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
+                startButton.disabled = false;
+                loadingDiv.style.display = 'none';
+            } catch (error) {
+                console.error('Error initializing pipelines:', error);
+                addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
+                loadingDiv.style.display = 'none';
+            }
+        }
+        async function processSpeech(audio) {
+            try {
+                if (!sttPipeline || !ttsPipeline) {
+                    throw new Error('Pipelines not initialized');
+                }
+                const transcription = await sttPipeline(audio);
+                addLog(`User: ${transcription.text}`);
+                const botResponse = `I heard you say: "${transcription.text}".`;
+                addLog(`Bot: ${botResponse}`);
+                isSpeaking = true;
+                const speechOutput = await ttsPipeline(botResponse);
+                await playAudio(speechOutput.audio);
+                isSpeaking = false;
+            } catch (error) {
+                console.error('Error processing speech:', error);
+                addLog('System: Error processing speech. Please try again.');
+            }
+        }
+        function addLog(message) {
+            const now = new Date();
+            const timestamp = now.toLocaleTimeString();
+            const logMessage = `[${timestamp}] ${message}`;
+            const messageElement = document.createElement('div');
+            messageElement.textContent = logMessage;
+            logsDiv.appendChild(messageElement);
+            logsDiv.scrollTop = logsDiv.scrollHeight;
+        }
+        function playAudio(audioArray) {
+            return new Promise((resolve) => {
+                const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
+                const channelData = audioBuffer.getChannelData(0);
+                channelData.set(audioArray);
+                const source = audioContext.createBufferSource();
+                currentAudioSource = source;
+                source.buffer = audioBuffer;
+                source.connect(analyser);
+                analyser.connect(audioContext.destination);
+                source.start();
+                source.onended = () => {
+                    currentAudioSource = null;
+                    resolve();
+                };
+            });
+        }
+        function stopCurrentAudio() {
+            if (currentAudioSource) {
+                currentAudioSource.stop();
+                currentAudioSource = null;
+            }
+        }
+        async function toggleListening() {
+            if (isListening) {
+                await stopListening();
+            } else {
+                await startListening();
+            }
+        }
+        async function startListening() {
+            try {
+                audioContext = new (window.AudioContext || window.webkitAudioContext)();
+                analyser = audioContext.createAnalyser();
+                analyser.fftSize = 128;
+                dataArray = new Uint8Array(analyser.frequencyBinCount);
+                localVideo.volume = 0;
+                localVideo.muted = true;
+                document.getElementById('localVideo').volume = 0;
+                remoteVideo.volume = 0;
+                remoteVideo.muted = true;
+                document.getElementById('remoteVideo').volume = 0;
+                // Request both audio and video streams
+                microphoneStream = await navigator.mediaDevices.getUserMedia({
+                    audio: true,
+                    video: { width: 1, height: 1 } // Minimal video for echo cancellation
+                });
+                localVideo.srcObject = microphoneStream;
+                await localVideo.play();
+                console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
+                console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
+                // Implement loopback hack for improved echo cancellation
+                const offerOptions = {
+                    offerToReceiveAudio: true,
+                    offerToReceiveVideo: false,
+                };
+                rtcConnection = new RTCPeerConnection();
+                rtcLoopbackConnection = new RTCPeerConnection();
+                rtcConnection.onicecandidate = e => e.candidate && rtcLoopbackConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
+                rtcLoopbackConnection.onicecandidate = e => e.candidate && rtcConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
+                rtcLoopbackConnection.ontrack = e => e.streams[0].getTracks().forEach(track => loopbackStream.addTrack(track));
+                microphoneStream.getTracks().forEach(track => rtcConnection.addTrack(track, microphoneStream));
+                const offer = await rtcConnection.createOffer(offerOptions);
+                await rtcConnection.setLocalDescription(offer);
+                await rtcLoopbackConnection.setRemoteDescription(offer);
+                const answer = await rtcLoopbackConnection.createAnswer();
+                await rtcLoopbackConnection.setLocalDescription(answer);
+                await rtcConnection.setRemoteDescription(answer);
+                // Use the loopback stream for audio processing
+                const source = audioContext.createMediaStreamSource(loopbackStream);
+                source.connect(analyser);
+                myvad = await vad.MicVAD.new({
+                      noiseSuppression: true,  ///Added by RAHUL Atlury
+                      aggressiveness: 3, // Higher value for more aggressive detection Added by RAHUL ATLURY
+                    onSpeechStart: () => {
+                        addLog('--- Voice activity: speech start');
+                        updateVisualizer();
+                        if (isSpeaking) {
+                            addLog('User interrupted. Stopping bot speech.');
+                            stopCurrentAudio();
+                            isSpeaking = false;
+                        }
+                    },
+                    onSpeechEnd: (audio) => {
+                        addLog('--- Voice activity: speech end');
+                        cancelAnimationFrame(animationId);
+                        processSpeech(audio);
+                    }
+                });
+                await myvad.start();
+                startButton.textContent = 'End Call';
+                isListening = true;
+                addLog('System: Listening...');
+            } catch (error) {
+                console.error('Error starting voice activity:', error);
+                addLog('System: Error starting voice detection. Please check your microphone and try again.');
+            }
+        }
+        async function stopListening() {
+            if (myvad) {
+                try {
+                    await myvad.destroy();
+                } catch (error) {
+                    console.error('Error stopping voice activity:', error);
+                }
+                myvad = null;
+            }
+            if (microphoneStream) {
+                microphoneStream.getTracks().forEach(track => track.stop());
+                microphoneStream = null;
+            }
+            if (audioContext) {
+                await audioContext.close();
+                audioContext = null;
+            }
+            if (localVideo) {
+                localVideo.srcObject = null;
+            }
+            if (remoteVideo) {
+                remoteVideo.srcObject = null;
+            }
+            if (rtcConnection) {
+                rtcConnection.close();
+                rtcConnection = null;
+            }
+            if (rtcLoopbackConnection) {
+                rtcLoopbackConnection.close();
+                rtcLoopbackConnection = null;
+            }
+            loopbackStream = new MediaStream();
+            stopCurrentAudio();
+            startButton.textContent = 'Begin Call';
+            isListening = false;
+            addLog('System: Stopped listening.');
+            cancelAnimationFrame(animationId);
+            addLog('System: Microphone closed');
+          }
+        startButton.addEventListener('click', toggleListening);
+        clearLogsButton.addEventListener('click', () => {
+            logsDiv.innerHTML = '';
+        });
+        createVisualizer();
+        initializePipelines();
+    </script>
+</body>
+</html>