digitalhuman / index.backup3.html
atlury's picture
Rename index.html to index.backup3.html
645df8b verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Voice Chat Bot</title>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
margin: 0;
padding: 20px;
background-color: #1a1a1a;
color: #f0f0f0;
}
.container {
max-width: 800px;
margin: 0 auto;
}
h1 {
color: #ffd700;
text-align: center;
margin-bottom: 10px;
}
.subtitle {
text-align: center;
color: #ffd700;
margin-bottom: 20px;
}
#chat-container {
display: flex;
flex-direction: column;
height: 70vh;
}
#conversation {
flex-grow: 1;
border: 1px solid #444;
padding: 10px;
overflow-y: scroll;
background-color: #2a2a2a;
border-radius: 5px;
margin-bottom: 20px;
}
#controls {
display: flex;
justify-content: center;
margin-bottom: 20px;
}
button {
font-size: 18px;
padding: 10px 20px;
background-color: #ffd700;
color: #1a1a1a;
border: none;
border-radius: 5px;
cursor: pointer;
transition: background-color 0.3s;
}
button:hover {
background-color: #ffec8b;
}
button:disabled {
background-color: #666;
cursor: not-allowed;
}
#visualizer {
width: 100%;
height: 100px;
background-color: #2a2a2a;
border-radius: 5px;
overflow: hidden;
margin-bottom: 20px;
}
.bar {
width: 5px;
height: 100%;
background-color: #ffd700;
display: inline-block;
margin-right: 1px;
}
#loading {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.8);
display: flex;
justify-content: center;
align-items: center;
z-index: 1000;
}
.spinner {
width: 50px;
height: 50px;
border: 5px solid #f3f3f3;
border-top: 5px solid #ffd700;
border-radius: 50%;
animation: spin 1s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
#configuration {
margin-bottom: 20px;
}
select {
width: 100%;
padding: 10px;
font-size: 16px;
background-color: #2a2a2a;
color: #f0f0f0;
border: 1px solid #444;
border-radius: 5px;
}
#model-info {
margin-top: 10px;
font-size: 14px;
color: #aaa;
}
#logs {
background-color: #2a2a2a;
border: 1px solid #444;
border-radius: 5px;
padding: 10px;
height: 200px;
overflow-y: scroll;
font-family: monospace;
font-size: 14px;
}
#clear-logs {
margin-top: 10px;
font-size: 14px;
padding: 5px 10px;
}
</style>
</head>
<body>
<div id="loading">
<div class="spinner"></div>
</div>
<div class="container">
<h1>Voice Chat Bot Demo</h1>
<p class="subtitle">For best results, use headphones.</p>
<div id="chat-container">
<div id="controls">
<button id="startButton" disabled>Begin Call</button>
</div>
<div id="configuration">
<select id="configSelect">
<option value="fastest">Fastest</option>
<option value="balanced">Balanced</option>
<option value="quality">Highest Quality</option>
</select>
<div id="model-info">
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
</div>
</div>
<div id="visualizer"></div>
<div id="conversation"></div>
</div>
<h2>Logs</h2>
<div id="logs"></div>
<button id="clear-logs">Clear</button>
</div>
<script type="module">
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
env.localModelPath = './models';
const conversationDiv = document.getElementById('conversation');
const startButton = document.getElementById('startButton');
const visualizer = document.getElementById('visualizer');
const loadingDiv = document.getElementById('loading');
const logsDiv = document.getElementById('logs');
const clearLogsButton = document.getElementById('clear-logs');
let myvad;
let sttPipeline;
let ttsPipeline;
let audioContext;
let analyser;
let dataArray;
let bars;
let animationId;
let isListening = false;
let microphoneStream;
let isSpeaking = false;
let currentAudioSource = null;
function createVisualizer() {
const barCount = 64;
for (let i = 0; i < barCount; i++) {
const bar = document.createElement('div');
bar.className = 'bar';
visualizer.appendChild(bar);
}
bars = visualizer.getElementsByClassName('bar');
}
function updateVisualizer() {
analyser.getByteFrequencyData(dataArray);
for (let i = 0; i < bars.length; i++) {
const barHeight = dataArray[i] / 2;
bars[i].style.height = barHeight + 'px';
}
animationId = requestAnimationFrame(updateVisualizer);
}
async function initializePipelines() {
try {
sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
quantized: false,
});
addLog('System: Voice Chat Bot initialized. Click "Begin Call" to start.');
startButton.disabled = false;
loadingDiv.style.display = 'none';
} catch (error) {
console.error('Error initializing pipelines:', error);
addLog('System: Error initializing Voice Chat Bot. Please check the console for details.');
loadingDiv.style.display = 'none';
}
}
async function processSpeech(audio) {
try {
if (!sttPipeline || !ttsPipeline) {
throw new Error('Pipelines not initialized');
}
const transcription = await sttPipeline(audio);
addLog(`User: ${transcription.text}`);
const botResponse = `I heard you say: "${transcription.text}".`;
addLog(`Bot: ${botResponse}`);
isSpeaking = true;
const speechOutput = await ttsPipeline(botResponse);
await playAudio(speechOutput.audio);
isSpeaking = false;
} catch (error) {
console.error('Error processing speech:', error);
addLog('System: Error processing speech. Please try again.');
}
}
function addLog(message) {
const now = new Date();
const timestamp = now.toLocaleTimeString();
const logMessage = `[${timestamp}] ${message}`;
const messageElement = document.createElement('div');
messageElement.textContent = logMessage;
logsDiv.appendChild(messageElement);
logsDiv.scrollTop = logsDiv.scrollHeight;
}
function playAudio(audioArray) {
return new Promise((resolve) => {
const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
const channelData = audioBuffer.getChannelData(0);
channelData.set(audioArray);
const source = audioContext.createBufferSource();
currentAudioSource = source; // Store the current audio source
source.buffer = audioBuffer;
source.connect(analyser);
analyser.connect(audioContext.destination);
source.start();
source.onended = () => {
currentAudioSource = null;
resolve();
};
});
}
function stopCurrentAudio() {
if (currentAudioSource) {
currentAudioSource.stop();
currentAudioSource = null;
}
}
async function toggleListening() {
if (isListening) {
await stopListening();
} else {
await startListening();
}
}
async function startListening() {
try {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
analyser.fftSize = 128;
dataArray = new Uint8Array(analyser.frequencyBinCount);
myvad = await vad.MicVAD.new({
onSpeechStart: () => {
addLog('--- vad: speech start');
updateVisualizer();
if (isSpeaking) {
addLog('User interrupted. Stopping bot speech.');
stopCurrentAudio();
isSpeaking = false;
}
},
onSpeechEnd: (audio) => {
addLog('--- vad: speech end');
cancelAnimationFrame(animationId);
processSpeech(audio);
}
});
// Enable echo cancellation in audio input
microphoneStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true, // Enable echo cancellation
noiseSuppression: true, // Optional: Enable noise suppression
autoGainControl: true, // Optional: Enable auto gain control
voiceIsolation: true // Enable voice isolation if supported
}
});
console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
const supportedConstraints = navigator.mediaDevices.getSupportedConstraints();
console.log('Supported constraints:', supportedConstraints);
if (!supportedConstraints.echoCancellation) {
console.warn('Echo cancellation is not supported on this device or browser.');
}
const source = audioContext.createMediaStreamSource(microphoneStream);
source.connect(analyser);
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
await myvad.start();
startButton.textContent = 'End Call';
isListening = true;
addLog('System: Listening...');
} catch (error) {
console.error('Error starting VAD:', error);
addLog('System: Error starting voice detection. Please check your microphone and try again.');
}
}
async function stopListening() {
if (myvad) {
try {
await myvad.destroy();
} catch (error) {
console.error('Error stopping VAD:', error);
}
myvad = null;
}
if (microphoneStream) {
microphoneStream.getTracks().forEach(track => track.stop());
microphoneStream = null;
}
if (audioContext) {
await audioContext.close();
audioContext = null;
}
stopCurrentAudio();
startButton.textContent = 'Begin Call';
isListening = false;
addLog('System: Stopped listening.');
cancelAnimationFrame(animationId);
addLog('System: Microphone closed');
}
startButton.addEventListener('click', toggleListening);
clearLogsButton.addEventListener('click', () => {
logsDiv.innerHTML = '';
});
createVisualizer();
initializePipelines();
</script>
</body>
</html>