Spaces:

atlury
/

digitalhuman

Running

File size: 4,992 Bytes

0f941de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8e33ed
eaeaa29
 
 
0f941de
 
 
 
 
 
 
 
 
 
eaeaa29
 
eb067a7
 
 
eaeaa29
 
 
 
 
0f941de
 
 
 
eaeaa29
 
 
 
0f941de
 
 
 
 
 
 
eb067a7
0f941de
 
 
eaeaa29
0f941de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eaeaa29
0f941de
 
 
 
 
eaeaa29
0f941de
 
 
 
 
 
 
 
 
 
eaeaa29
0f941de
 
59d50fc

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Voice Chat Bot</title>
    <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
    <style>
        body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
        button { font-size: 18px; padding: 10px 20px; margin: 10px 0; }
        #conversation { border: 1px solid #ccc; padding: 10px; height: 300px; overflow-y: scroll; margin-bottom: 10px; }
    </style>
</head>
<body>
    <h1>Voice Chat Bot</h1>
    <div id="conversation"></div>
    <button id="startButton">Start Listening</button>
    <button id="stopButton" disabled>Stop Listening</button>

    <script type="module">
        import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';

        // Set the local directory for caching models
        env.localModelPath = './models';

        const conversationDiv = document.getElementById('conversation');
        const startButton = document.getElementById('startButton');
        const stopButton = document.getElementById('stopButton');

        let myvad;
        let sttPipeline;
        let ttsPipeline;

        async function initializePipelines() {
            try {
                sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
                ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
                    quantized: false,
                });
                addMessage('System', 'Voice Chat Bot initialized. Click "Start Listening" to begin.');
            } catch (error) {
                console.error('Error initializing pipelines:', error);
                addMessage('System', 'Error initializing Voice Chat Bot. Please check the console for details.');
            }
        }

        async function processSpeech(audio) {
            try {
                if (!sttPipeline || !ttsPipeline) {
                    throw new Error('Pipelines not initialized');
                }

                const transcription = await sttPipeline(audio);
                addMessage('User', transcription.text);
                
                // Placeholder for LLM response
                const botResponse = `I heard you say: "${transcription.text}". This is a placeholder response.`;
                addMessage('Bot', botResponse);

                const speechOutput = await ttsPipeline(botResponse);
                playAudio(speechOutput.audio);
            } catch (error) {
                console.error('Error processing speech:', error);
                addMessage('System', 'Error processing speech. Please try again.');
            }
        }

        function addMessage(sender, message) {
            const messageElement = document.createElement('p');
            messageElement.innerHTML = `<strong>${sender}:</strong> ${message}`;
            conversationDiv.appendChild(messageElement);
            conversationDiv.scrollTop = conversationDiv.scrollHeight;
        }

        function playAudio(audioArray) {
            const audioContext = new (window.AudioContext || window.webkitAudioContext)();
            const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
            const channelData = audioBuffer.getChannelData(0);
            channelData.set(audioArray);

            const source = audioContext.createBufferSource();
            source.buffer = audioBuffer;
            source.connect(audioContext.destination);
            source.start();
        }

        async function startListening() {
            try {
                myvad = await vad.MicVAD.new({
                    onSpeechEnd: (audio) => {
                        processSpeech(audio);
                    }
                });
                await myvad.start();
                startButton.disabled = true;
                stopButton.disabled = false;
                addMessage('System', 'Listening...');
            } catch (error) {
                console.error('Error starting VAD:', error);
                addMessage('System', 'Error starting voice detection. Please check your microphone and try again.');
            }
        }

        function stopListening() {
            if (myvad) {
                myvad.pause();
                startButton.disabled = false;
                stopButton.disabled = true;
                addMessage('System', 'Stopped listening.');
            }
        }

        startButton.addEventListener('click', startListening);
        stopButton.addEventListener('click', stopListening);

        // Initialize pipelines when the page loads
        initializePipelines();
    </script>
</body>
</html>