Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Voice Chat Bot</title> | |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script> | |
<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script> | |
<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script> | |
<style> | |
body { | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
margin: 0; | |
padding: 20px; | |
background-color: #1a1a1a; | |
color: #f0f0f0; | |
} | |
.container { | |
max-width: 800px; | |
margin: 0 auto; | |
} | |
h1 { | |
color: #ffd700; | |
text-align: center; | |
margin-bottom: 10px; | |
} | |
.subtitle { | |
text-align: center; | |
color: #ffd700; | |
margin-bottom: 20px; | |
} | |
#chat-container { | |
display: flex; | |
flex-direction: column; | |
height: 70vh; | |
} | |
#conversation { | |
flex-grow: 1; | |
border: 1px solid #444; | |
padding: 10px; | |
overflow-y: scroll; | |
background-color: #2a2a2a; | |
border-radius: 5px; | |
margin-bottom: 20px; | |
} | |
#controls { | |
display: flex; | |
justify-content: center; | |
margin-bottom: 20px; | |
} | |
button { | |
font-size: 18px; | |
padding: 10px 20px; | |
background-color: #ffd700; | |
color: #1a1a1a; | |
border: none; | |
border-radius: 5px; | |
cursor: pointer; | |
transition: background-color 0.3s; | |
} | |
button:hover { | |
background-color: #ffec8b; | |
} | |
button:disabled { | |
background-color: #666; | |
cursor: not-allowed; | |
} | |
#visualizer { | |
width: 100%; | |
height: 100px; | |
background-color: #2a2a2a; | |
border-radius: 5px; | |
overflow: hidden; | |
margin-bottom: 20px; | |
} | |
.bar { | |
width: 5px; | |
height: 100%; | |
background-color: #ffd700; | |
display: inline-block; | |
margin-right: 1px; | |
} | |
#loading { | |
position: fixed; | |
top: 0; | |
left: 0; | |
width: 100%; | |
height: 100%; | |
background-color: rgba(0, 0, 0, 0.8); | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
z-index: 1000; | |
} | |
.spinner { | |
width: 50px; | |
height: 50px; | |
border: 5px solid #f3f3f3; | |
border-top: 5px solid #ffd700; | |
border-radius: 50%; | |
animation: spin 1s linear infinite; | |
} | |
@keyframes spin { | |
0% { transform: rotate(0deg); } | |
100% { transform: rotate(360deg); } | |
} | |
#configuration { | |
margin-bottom: 20px; | |
} | |
select { | |
width: 100%; | |
padding: 10px; | |
font-size: 16px; | |
background-color: #2a2a2a; | |
color: #f0f0f0; | |
border: 1px solid #444; | |
border-radius: 5px; | |
} | |
#model-info { | |
margin-top: 10px; | |
font-size: 14px; | |
color: #aaa; | |
} | |
#logs { | |
background-color: #2a2a2a; | |
border: 1px solid #444; | |
border-radius: 5px; | |
padding: 10px; | |
height: 200px; | |
overflow-y: scroll; | |
font-family: monospace; | |
font-size: 14px; | |
} | |
#clear-logs { | |
margin-top: 10px; | |
font-size: 14px; | |
padding: 5px 10px; | |
} | |
</style> | |
</head> | |
<body> | |
<div id="loading"> | |
<div class="spinner"></div> | |
</div> | |
<div class="container"> | |
<h1>Voice Chat Bot Demo</h1> | |
<p class="subtitle">For best results, use headphones.</p> | |
<div id="chat-container"> | |
<div id="controls"> | |
<button id="startButton" disabled>Begin Call</button> | |
</div> | |
<div id="configuration"> | |
<select id="configSelect"> | |
<option value="fastest">Fastest</option> | |
<option value="balanced">Balanced</option> | |
<option value="quality">Highest Quality</option> | |
</select> | |
<div id="model-info"> | |
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder | |
</div> | |
</div> | |
<div id="visualizer"></div> | |
<div id="conversation"></div> | |
</div> | |
<h2>Logs</h2> | |
<div id="logs"></div> | |
<button id="clear-logs">Clear</button> | |
</div> | |
<script type="module"> | |
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]'; | |
env.localModelPath = './models'; | |
const conversationDiv = document.getElementById('conversation'); | |
const startButton = document.getElementById('startButton'); | |
const visualizer = document.getElementById('visualizer'); | |
const loadingDiv = document.getElementById('loading'); | |
const logsDiv = document.getElementById('logs'); | |
const clearLogsButton = document.getElementById('clear-logs'); | |
let myvad; | |
let sttPipeline; | |
let ttsPipeline; | |
let audioContext; | |
let analyser; | |
let dataArray; | |
let bars; | |
let animationId; | |
let isListening = false; | |
let microphoneStream; | |
let isSpeaking = false; | |
let currentAudioSource = null; | |
function createVisualizer() { | |
const barCount = 64; | |
for (let i = 0; i < barCount; i++) { | |
const bar = document.createElement('div'); | |
bar.className = 'bar'; | |
visualizer.appendChild(bar); | |
} | |
bars = visualizer.getElementsByClassName('bar'); | |
} | |
function updateVisualizer() { | |
analyser.getByteFrequencyData(dataArray); | |
for (let i = 0; i < bars.length; i++) { | |
const barHeight = dataArray[i] / 2; | |
bars[i].style.height = barHeight + 'px'; | |
} | |
animationId = requestAnimationFrame(updateVisualizer); | |
} | |
async function initializePipelines() { | |
try { | |
sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en'); | |
ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', { | |
quantized: false, | |
}); | |
addLog('System: Voice Chat Bot initialized. Click "Begin Call" to start.'); | |
startButton.disabled = false; | |
loadingDiv.style.display = 'none'; | |
} catch (error) { | |
console.error('Error initializing pipelines:', error); | |
addLog('System: Error initializing Voice Chat Bot. Please check the console for details.'); | |
loadingDiv.style.display = 'none'; | |
} | |
} | |
async function processSpeech(audio) { | |
try { | |
if (!sttPipeline || !ttsPipeline) { | |
throw new Error('Pipelines not initialized'); | |
} | |
const transcription = await sttPipeline(audio); | |
addLog(`User: ${transcription.text}`); | |
const botResponse = `I heard you say: "${transcription.text}".`; | |
addLog(`Bot: ${botResponse}`); | |
isSpeaking = true; | |
const speechOutput = await ttsPipeline(botResponse); | |
await playAudio(speechOutput.audio); | |
isSpeaking = false; | |
} catch (error) { | |
console.error('Error processing speech:', error); | |
addLog('System: Error processing speech. Please try again.'); | |
} | |
} | |
function addLog(message) { | |
const now = new Date(); | |
const timestamp = now.toLocaleTimeString(); | |
const logMessage = `[${timestamp}] ${message}`; | |
const messageElement = document.createElement('div'); | |
messageElement.textContent = logMessage; | |
logsDiv.appendChild(messageElement); | |
logsDiv.scrollTop = logsDiv.scrollHeight; | |
} | |
function playAudio(audioArray) { | |
return new Promise((resolve) => { | |
const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000); | |
const channelData = audioBuffer.getChannelData(0); | |
channelData.set(audioArray); | |
const source = audioContext.createBufferSource(); | |
currentAudioSource = source; // Store the current audio source | |
source.buffer = audioBuffer; | |
source.connect(analyser); | |
analyser.connect(audioContext.destination); | |
source.start(); | |
source.onended = () => { | |
currentAudioSource = null; | |
resolve(); | |
}; | |
}); | |
} | |
function stopCurrentAudio() { | |
if (currentAudioSource) { | |
currentAudioSource.stop(); | |
currentAudioSource = null; | |
} | |
} | |
async function toggleListening() { | |
if (isListening) { | |
await stopListening(); | |
} else { | |
await startListening(); | |
} | |
} | |
async function startListening() { | |
try { | |
audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
analyser = audioContext.createAnalyser(); | |
analyser.fftSize = 128; | |
dataArray = new Uint8Array(analyser.frequencyBinCount); | |
myvad = await vad.MicVAD.new({ | |
onSpeechStart: () => { | |
addLog('--- vad: speech start'); | |
updateVisualizer(); | |
if (isSpeaking) { | |
addLog('User interrupted. Stopping bot speech.'); | |
stopCurrentAudio(); | |
isSpeaking = false; | |
} | |
}, | |
onSpeechEnd: (audio) => { | |
addLog('--- vad: speech end'); | |
cancelAnimationFrame(animationId); | |
processSpeech(audio); | |
} | |
}); | |
// Enable echo cancellation in audio input | |
microphoneStream = await navigator.mediaDevices.getUserMedia({ | |
audio: { | |
echoCancellation: true, // Enable echo cancellation | |
noiseSuppression: true, // Optional: Enable noise suppression | |
autoGainControl: true, // Optional: Enable auto gain control | |
voiceIsolation: true // Enable voice isolation if supported | |
} | |
}); | |
console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints()); | |
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings()); | |
const supportedConstraints = navigator.mediaDevices.getSupportedConstraints(); | |
console.log('Supported constraints:', supportedConstraints); | |
if (!supportedConstraints.echoCancellation) { | |
console.warn('Echo cancellation is not supported on this device or browser.'); | |
} | |
const source = audioContext.createMediaStreamSource(microphoneStream); | |
source.connect(analyser); | |
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings()); | |
await myvad.start(); | |
startButton.textContent = 'End Call'; | |
isListening = true; | |
addLog('System: Listening...'); | |
} catch (error) { | |
console.error('Error starting VAD:', error); | |
addLog('System: Error starting voice detection. Please check your microphone and try again.'); | |
} | |
} | |
async function stopListening() { | |
if (myvad) { | |
try { | |
await myvad.destroy(); | |
} catch (error) { | |
console.error('Error stopping VAD:', error); | |
} | |
myvad = null; | |
} | |
if (microphoneStream) { | |
microphoneStream.getTracks().forEach(track => track.stop()); | |
microphoneStream = null; | |
} | |
if (audioContext) { | |
await audioContext.close(); | |
audioContext = null; | |
} | |
stopCurrentAudio(); | |
startButton.textContent = 'Begin Call'; | |
isListening = false; | |
addLog('System: Stopped listening.'); | |
cancelAnimationFrame(animationId); | |
addLog('System: Microphone closed'); | |
} | |
startButton.addEventListener('click', toggleListening); | |
clearLogsButton.addEventListener('click', () => { | |
logsDiv.innerHTML = ''; | |
}); | |
createVisualizer(); | |
initializePipelines(); | |
</script> | |
</body> | |
</html> |