Spaces:
Running
Running
Update index.backup5.html
Browse files- index.backup5.html +34 -42
index.backup5.html
CHANGED
@@ -161,7 +161,7 @@
|
|
161 |
<option value="quality">Highest Quality</option>
|
162 |
</select>
|
163 |
<div id="model-info">
|
164 |
-
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM:
|
165 |
</div>
|
166 |
</div>
|
167 |
<div id="visualizer"></div>
|
@@ -179,12 +179,13 @@
|
|
179 |
|
180 |
env.localModelPath = './models';
|
181 |
|
|
|
182 |
// Configure environment before initializing pipelines
|
183 |
env.backends = ['wasm'];
|
184 |
env.wasm = env.wasm || {};
|
185 |
-
env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
|
186 |
-
env.wasm.simd = true;
|
187 |
-
env.numThreads = navigator.hardwareConcurrency || 4;
|
188 |
|
189 |
const conversationDiv = document.getElementById('conversation');
|
190 |
const startButton = document.getElementById('startButton');
|
@@ -198,7 +199,6 @@
|
|
198 |
let myvad;
|
199 |
let sttPipeline;
|
200 |
let ttsPipeline;
|
201 |
-
let llmPipeline;
|
202 |
let audioContext;
|
203 |
let analyser;
|
204 |
let dataArray;
|
@@ -228,55 +228,46 @@
|
|
228 |
const barHeight = dataArray[i] / 2;
|
229 |
bars[i].style.height = barHeight + 'px';
|
230 |
}
|
231 |
-
|
|
|
|
|
|
|
232 |
}
|
233 |
|
|
|
234 |
async function initializePipelines() {
|
235 |
try {
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
|
|
241 |
]);
|
242 |
|
243 |
-
addLog('System: Digital Human Voice Chat initialized
|
244 |
startButton.disabled = false;
|
245 |
loadingDiv.style.display = 'none';
|
246 |
} catch (error) {
|
247 |
console.error('Error initializing pipelines:', error);
|
248 |
-
addLog(
|
249 |
loadingDiv.style.display = 'none';
|
250 |
}
|
251 |
}
|
252 |
|
253 |
async function processSpeech(audio) {
|
254 |
try {
|
255 |
-
if (!sttPipeline || !ttsPipeline
|
256 |
throw new Error('Pipelines not initialized');
|
257 |
}
|
258 |
|
259 |
const transcription = await sttPipeline(audio);
|
260 |
addLog(`User: ${transcription.text}`);
|
261 |
|
262 |
-
const
|
263 |
-
{ role: 'system', content: 'You are a helpful assistant.' },
|
264 |
-
{ role: 'user', content: transcription.text }
|
265 |
-
];
|
266 |
-
|
267 |
-
// Apply chat template
|
268 |
-
const text = llmPipeline.tokenizer.apply_chat_template(messages, {
|
269 |
-
tokenize: false,
|
270 |
-
add_generation_prompt: true,
|
271 |
-
});
|
272 |
-
|
273 |
-
// Generate text
|
274 |
-
const llmResponse = await llmPipeline(text, {
|
275 |
-
max_new_tokens: 128,
|
276 |
-
do_sample: false
|
277 |
-
});
|
278 |
-
|
279 |
-
const botResponse = llmResponse[0].generated_text;
|
280 |
addLog(`Bot: ${botResponse}`);
|
281 |
|
282 |
isSpeaking = true;
|
@@ -285,7 +276,7 @@
|
|
285 |
isSpeaking = false;
|
286 |
} catch (error) {
|
287 |
console.error('Error processing speech:', error);
|
288 |
-
addLog(
|
289 |
}
|
290 |
}
|
291 |
|
@@ -348,9 +339,10 @@
|
|
348 |
remoteVideo.muted = true;
|
349 |
document.getElementById('remoteVideo').volume = 0;
|
350 |
|
|
|
351 |
microphoneStream = await navigator.mediaDevices.getUserMedia({
|
352 |
audio: true,
|
353 |
-
video: { width: 1, height: 1 }
|
354 |
});
|
355 |
|
356 |
localVideo.srcObject = microphoneStream;
|
@@ -359,6 +351,7 @@
|
|
359 |
console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
|
360 |
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
|
361 |
|
|
|
362 |
const offerOptions = {
|
363 |
offerToReceiveAudio: true,
|
364 |
offerToReceiveVideo: false,
|
@@ -381,12 +374,13 @@
|
|
381 |
await rtcLoopbackConnection.setLocalDescription(answer);
|
382 |
await rtcConnection.setRemoteDescription(answer);
|
383 |
|
|
|
384 |
const source = audioContext.createMediaStreamSource(loopbackStream);
|
385 |
source.connect(analyser);
|
386 |
|
387 |
myvad = await vad.MicVAD.new({
|
388 |
-
|
389 |
-
|
390 |
onSpeechStart: () => {
|
391 |
addLog('--- Voice activity: speech start');
|
392 |
updateVisualizer();
|
@@ -404,12 +398,12 @@
|
|
404 |
});
|
405 |
|
406 |
await myvad.start();
|
407 |
-
|
408 |
isListening = true;
|
409 |
addLog('System: Listening...');
|
410 |
} catch (error) {
|
411 |
console.error('Error starting voice activity:', error);
|
412 |
-
addLog(
|
413 |
}
|
414 |
}
|
415 |
|
@@ -451,7 +445,7 @@
|
|
451 |
addLog('System: Stopped listening.');
|
452 |
cancelAnimationFrame(animationId);
|
453 |
addLog('System: Microphone closed');
|
454 |
-
|
455 |
|
456 |
startButton.addEventListener('click', toggleListening);
|
457 |
clearLogsButton.addEventListener('click', () => {
|
@@ -462,6 +456,4 @@
|
|
462 |
initializePipelines();
|
463 |
</script>
|
464 |
</body>
|
465 |
-
</html>
|
466 |
-
|
467 |
-
|
|
|
161 |
<option value="quality">Highest Quality</option>
|
162 |
</select>
|
163 |
<div id="model-info">
|
164 |
+
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
|
165 |
</div>
|
166 |
</div>
|
167 |
<div id="visualizer"></div>
|
|
|
179 |
|
180 |
env.localModelPath = './models';
|
181 |
|
182 |
+
//BELOW 5 statements added by RAHUL
|
183 |
// Configure environment before initializing pipelines
|
184 |
env.backends = ['wasm'];
|
185 |
env.wasm = env.wasm || {};
|
186 |
+
env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/'; // Ensure correct WASM paths
|
187 |
+
env.wasm.simd = true; // Enable SIMD if available
|
188 |
+
env.numThreads = navigator.hardwareConcurrency || 4; // Use available CPU cores
|
189 |
|
190 |
const conversationDiv = document.getElementById('conversation');
|
191 |
const startButton = document.getElementById('startButton');
|
|
|
199 |
let myvad;
|
200 |
let sttPipeline;
|
201 |
let ttsPipeline;
|
|
|
202 |
let audioContext;
|
203 |
let analyser;
|
204 |
let dataArray;
|
|
|
228 |
const barHeight = dataArray[i] / 2;
|
229 |
bars[i].style.height = barHeight + 'px';
|
230 |
}
|
231 |
+
// Use setTimeout instead of requestAnimationFrame to reduce update frequency - RAHUL ATLURY
|
232 |
+
animationId = setTimeout(updateVisualizer, 50); // Update every 50ms - RAHUL ATLURY
|
233 |
+
|
234 |
+
//animationId = requestAnimationFrame(updateVisualizer);
|
235 |
}
|
236 |
|
237 |
+
|
238 |
async function initializePipelines() {
|
239 |
try {
|
240 |
+
|
241 |
+
//sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }); // added , { quantized: true }
|
242 |
+
//ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
|
243 |
+
// quantized: true, //changed to true - RAHUL ATLURY
|
244 |
+
//});
|
245 |
+
|
246 |
+
[sttPipeline, ttsPipeline] = await Promise.all([
|
247 |
+
pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
|
248 |
+
pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
|
249 |
]);
|
250 |
|
251 |
+
addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
|
252 |
startButton.disabled = false;
|
253 |
loadingDiv.style.display = 'none';
|
254 |
} catch (error) {
|
255 |
console.error('Error initializing pipelines:', error);
|
256 |
+
addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
|
257 |
loadingDiv.style.display = 'none';
|
258 |
}
|
259 |
}
|
260 |
|
261 |
async function processSpeech(audio) {
|
262 |
try {
|
263 |
+
if (!sttPipeline || !ttsPipeline) {
|
264 |
throw new Error('Pipelines not initialized');
|
265 |
}
|
266 |
|
267 |
const transcription = await sttPipeline(audio);
|
268 |
addLog(`User: ${transcription.text}`);
|
269 |
|
270 |
+
const botResponse = `I heard you say: "${transcription.text}".`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
addLog(`Bot: ${botResponse}`);
|
272 |
|
273 |
isSpeaking = true;
|
|
|
276 |
isSpeaking = false;
|
277 |
} catch (error) {
|
278 |
console.error('Error processing speech:', error);
|
279 |
+
addLog('System: Error processing speech. Please try again.');
|
280 |
}
|
281 |
}
|
282 |
|
|
|
339 |
remoteVideo.muted = true;
|
340 |
document.getElementById('remoteVideo').volume = 0;
|
341 |
|
342 |
+
// Request both audio and video streams
|
343 |
microphoneStream = await navigator.mediaDevices.getUserMedia({
|
344 |
audio: true,
|
345 |
+
video: { width: 1, height: 1 } // Minimal video for echo cancellation
|
346 |
});
|
347 |
|
348 |
localVideo.srcObject = microphoneStream;
|
|
|
351 |
console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
|
352 |
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
|
353 |
|
354 |
+
// Implement loopback hack for improved echo cancellation
|
355 |
const offerOptions = {
|
356 |
offerToReceiveAudio: true,
|
357 |
offerToReceiveVideo: false,
|
|
|
374 |
await rtcLoopbackConnection.setLocalDescription(answer);
|
375 |
await rtcConnection.setRemoteDescription(answer);
|
376 |
|
377 |
+
// Use the loopback stream for audio processing
|
378 |
const source = audioContext.createMediaStreamSource(loopbackStream);
|
379 |
source.connect(analyser);
|
380 |
|
381 |
myvad = await vad.MicVAD.new({
|
382 |
+
noiseSuppression: true, ///Added by RAHUL Atlury
|
383 |
+
aggressiveness: 3, // Higher value for more aggressive detection Added by RAHUL ATLURY
|
384 |
onSpeechStart: () => {
|
385 |
addLog('--- Voice activity: speech start');
|
386 |
updateVisualizer();
|
|
|
398 |
});
|
399 |
|
400 |
await myvad.start();
|
401 |
+
startButton.textContent = 'End Call';
|
402 |
isListening = true;
|
403 |
addLog('System: Listening...');
|
404 |
} catch (error) {
|
405 |
console.error('Error starting voice activity:', error);
|
406 |
+
addLog('System: Error starting voice detection. Please check your microphone and try again.');
|
407 |
}
|
408 |
}
|
409 |
|
|
|
445 |
addLog('System: Stopped listening.');
|
446 |
cancelAnimationFrame(animationId);
|
447 |
addLog('System: Microphone closed');
|
448 |
+
}
|
449 |
|
450 |
startButton.addEventListener('click', toggleListening);
|
451 |
clearLogsButton.addEventListener('click', () => {
|
|
|
456 |
initializePipelines();
|
457 |
</script>
|
458 |
</body>
|
459 |
+
</html>
|
|
|
|