atlury commited on
Commit
08925c7
·
verified ·
1 Parent(s): 1b06c14

Update index.backup5.html

Browse files
Files changed (1) hide show
  1. index.backup5.html +34 -42
index.backup5.html CHANGED
@@ -161,7 +161,7 @@
161
  <option value="quality">Highest Quality</option>
162
  </select>
163
  <div id="model-info">
164
- TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/Qwen1.5-0.5B-Chat
165
  </div>
166
  </div>
167
  <div id="visualizer"></div>
@@ -179,12 +179,13 @@
179
 
180
  env.localModelPath = './models';
181
 
 
182
  // Configure environment before initializing pipelines
183
  env.backends = ['wasm'];
184
  env.wasm = env.wasm || {};
185
- env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
186
- env.wasm.simd = true;
187
- env.numThreads = navigator.hardwareConcurrency || 4;
188
 
189
  const conversationDiv = document.getElementById('conversation');
190
  const startButton = document.getElementById('startButton');
@@ -198,7 +199,6 @@
198
  let myvad;
199
  let sttPipeline;
200
  let ttsPipeline;
201
- let llmPipeline;
202
  let audioContext;
203
  let analyser;
204
  let dataArray;
@@ -228,55 +228,46 @@
228
  const barHeight = dataArray[i] / 2;
229
  bars[i].style.height = barHeight + 'px';
230
  }
231
- animationId = setTimeout(updateVisualizer, 50);
 
 
 
232
  }
233
 
 
234
  async function initializePipelines() {
235
  try {
236
- addLog('System: Initializing pipelines...');
237
- [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
238
- pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
239
- pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
240
- pipeline('text-generation', 'Xenova/Qwen1.5-0.5B-Chat', { quantized: true })
 
 
 
 
241
  ]);
242
 
243
- addLog('System: Digital Human Voice Chat initialized with Qwen1.5-0.5B-Chat. Click "Begin Call" to start.');
244
  startButton.disabled = false;
245
  loadingDiv.style.display = 'none';
246
  } catch (error) {
247
  console.error('Error initializing pipelines:', error);
248
- addLog(`System: Error initializing pipelines: ${error.message}`);
249
  loadingDiv.style.display = 'none';
250
  }
251
  }
252
 
253
  async function processSpeech(audio) {
254
  try {
255
- if (!sttPipeline || !ttsPipeline || !llmPipeline) {
256
  throw new Error('Pipelines not initialized');
257
  }
258
 
259
  const transcription = await sttPipeline(audio);
260
  addLog(`User: ${transcription.text}`);
261
 
262
- const messages = [
263
- { role: 'system', content: 'You are a helpful assistant.' },
264
- { role: 'user', content: transcription.text }
265
- ];
266
-
267
- // Apply chat template
268
- const text = llmPipeline.tokenizer.apply_chat_template(messages, {
269
- tokenize: false,
270
- add_generation_prompt: true,
271
- });
272
-
273
- // Generate text
274
- const llmResponse = await llmPipeline(text, {
275
- max_new_tokens: 128,
276
- do_sample: false
277
- });
278
-
279
- const botResponse = llmResponse[0].generated_text;
280
  addLog(`Bot: ${botResponse}`);
281
 
282
  isSpeaking = true;
@@ -285,7 +276,7 @@
285
  isSpeaking = false;
286
  } catch (error) {
287
  console.error('Error processing speech:', error);
288
- addLog(`System: Error processing speech: ${error.message}`);
289
  }
290
  }
291
 
@@ -348,9 +339,10 @@
348
  remoteVideo.muted = true;
349
  document.getElementById('remoteVideo').volume = 0;
350
 
 
351
  microphoneStream = await navigator.mediaDevices.getUserMedia({
352
  audio: true,
353
- video: { width: 1, height: 1 }
354
  });
355
 
356
  localVideo.srcObject = microphoneStream;
@@ -359,6 +351,7 @@
359
  console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
360
  console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
361
 
 
362
  const offerOptions = {
363
  offerToReceiveAudio: true,
364
  offerToReceiveVideo: false,
@@ -381,12 +374,13 @@
381
  await rtcLoopbackConnection.setLocalDescription(answer);
382
  await rtcConnection.setRemoteDescription(answer);
383
 
 
384
  const source = audioContext.createMediaStreamSource(loopbackStream);
385
  source.connect(analyser);
386
 
387
  myvad = await vad.MicVAD.new({
388
- noiseSuppression: true,
389
- aggressiveness: 3,
390
  onSpeechStart: () => {
391
  addLog('--- Voice activity: speech start');
392
  updateVisualizer();
@@ -404,12 +398,12 @@
404
  });
405
 
406
  await myvad.start();
407
- startButton.textContent = 'End Call';
408
  isListening = true;
409
  addLog('System: Listening...');
410
  } catch (error) {
411
  console.error('Error starting voice activity:', error);
412
- addLog(`System: Error starting voice detection: ${error.message}`);
413
  }
414
  }
415
 
@@ -451,7 +445,7 @@
451
  addLog('System: Stopped listening.');
452
  cancelAnimationFrame(animationId);
453
  addLog('System: Microphone closed');
454
- }
455
 
456
  startButton.addEventListener('click', toggleListening);
457
  clearLogsButton.addEventListener('click', () => {
@@ -462,6 +456,4 @@
462
  initializePipelines();
463
  </script>
464
  </body>
465
- </html>
466
-
467
-
 
161
  <option value="quality">Highest Quality</option>
162
  </select>
163
  <div id="model-info">
164
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
165
  </div>
166
  </div>
167
  <div id="visualizer"></div>
 
179
 
180
  env.localModelPath = './models';
181
 
182
+ //BELOW 5 statements added by RAHUL
183
  // Configure environment before initializing pipelines
184
  env.backends = ['wasm'];
185
  env.wasm = env.wasm || {};
186
+ env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/'; // Ensure correct WASM paths
187
+ env.wasm.simd = true; // Enable SIMD if available
188
+ env.numThreads = navigator.hardwareConcurrency || 4; // Use available CPU cores
189
 
190
  const conversationDiv = document.getElementById('conversation');
191
  const startButton = document.getElementById('startButton');
 
199
  let myvad;
200
  let sttPipeline;
201
  let ttsPipeline;
 
202
  let audioContext;
203
  let analyser;
204
  let dataArray;
 
228
  const barHeight = dataArray[i] / 2;
229
  bars[i].style.height = barHeight + 'px';
230
  }
231
+ // Use setTimeout instead of requestAnimationFrame to reduce update frequency - RAHUL ATLURY
232
+ animationId = setTimeout(updateVisualizer, 50); // Update every 50ms - RAHUL ATLURY
233
+
234
+ //animationId = requestAnimationFrame(updateVisualizer);
235
  }
236
 
237
+
238
  async function initializePipelines() {
239
  try {
240
+
241
+ //sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }); // added , { quantized: true }
242
+ //ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
243
+ // quantized: true, //changed to true - RAHUL ATLURY
244
+ //});
245
+
246
+ [sttPipeline, ttsPipeline] = await Promise.all([
247
+ pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
248
+ pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
249
  ]);
250
 
251
+ addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
252
  startButton.disabled = false;
253
  loadingDiv.style.display = 'none';
254
  } catch (error) {
255
  console.error('Error initializing pipelines:', error);
256
+ addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
257
  loadingDiv.style.display = 'none';
258
  }
259
  }
260
 
261
  async function processSpeech(audio) {
262
  try {
263
+ if (!sttPipeline || !ttsPipeline) {
264
  throw new Error('Pipelines not initialized');
265
  }
266
 
267
  const transcription = await sttPipeline(audio);
268
  addLog(`User: ${transcription.text}`);
269
 
270
+ const botResponse = `I heard you say: "${transcription.text}".`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  addLog(`Bot: ${botResponse}`);
272
 
273
  isSpeaking = true;
 
276
  isSpeaking = false;
277
  } catch (error) {
278
  console.error('Error processing speech:', error);
279
+ addLog('System: Error processing speech. Please try again.');
280
  }
281
  }
282
 
 
339
  remoteVideo.muted = true;
340
  document.getElementById('remoteVideo').volume = 0;
341
 
342
+ // Request both audio and video streams
343
  microphoneStream = await navigator.mediaDevices.getUserMedia({
344
  audio: true,
345
+ video: { width: 1, height: 1 } // Minimal video for echo cancellation
346
  });
347
 
348
  localVideo.srcObject = microphoneStream;
 
351
  console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
352
  console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
353
 
354
+ // Implement loopback hack for improved echo cancellation
355
  const offerOptions = {
356
  offerToReceiveAudio: true,
357
  offerToReceiveVideo: false,
 
374
  await rtcLoopbackConnection.setLocalDescription(answer);
375
  await rtcConnection.setRemoteDescription(answer);
376
 
377
+ // Use the loopback stream for audio processing
378
  const source = audioContext.createMediaStreamSource(loopbackStream);
379
  source.connect(analyser);
380
 
381
  myvad = await vad.MicVAD.new({
382
+ noiseSuppression: true, ///Added by RAHUL Atlury
383
+ aggressiveness: 3, // Higher value for more aggressive detection Added by RAHUL ATLURY
384
  onSpeechStart: () => {
385
  addLog('--- Voice activity: speech start');
386
  updateVisualizer();
 
398
  });
399
 
400
  await myvad.start();
401
+ startButton.textContent = 'End Call';
402
  isListening = true;
403
  addLog('System: Listening...');
404
  } catch (error) {
405
  console.error('Error starting voice activity:', error);
406
+ addLog('System: Error starting voice detection. Please check your microphone and try again.');
407
  }
408
  }
409
 
 
445
  addLog('System: Stopped listening.');
446
  cancelAnimationFrame(animationId);
447
  addLog('System: Microphone closed');
448
+ }
449
 
450
  startButton.addEventListener('click', toggleListening);
451
  clearLogsButton.addEventListener('click', () => {
 
456
  initializePipelines();
457
  </script>
458
  </body>
459
+ </html>