atlury commited on
Commit
bea6e1a
·
verified ·
1 Parent(s): 536e020

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +148 -74
index.html CHANGED
@@ -3,27 +3,143 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Voice Chat Bot with Advanced Echo Cancellation and TinyLLM</title>
7
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
  <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
9
  <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
 
10
 
11
  <style>
12
- /* ... (previous styles remain unchanged) ... */
13
- #model-progress {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  width: 100%;
15
- background-color: #444;
 
16
  border-radius: 5px;
17
- margin-top: 10px;
18
  overflow: hidden;
 
19
  }
20
- #model-progress-bar {
21
- width: 0;
22
- height: 20px;
23
  background-color: #ffd700;
24
- text-align: center;
25
- line-height: 20px;
26
- color: #1a1a1a;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  </style>
29
  </head>
@@ -45,10 +161,7 @@
45
  <option value="quality">Highest Quality</option>
46
  </select>
47
  <div id="model-info">
48
- TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
49
- </div>
50
- <div id="model-progress">
51
- <div id="model-progress-bar"></div>
52
  </div>
53
  </div>
54
  <div id="visualizer"></div>
@@ -65,6 +178,8 @@
65
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
66
 
67
  env.localModelPath = './models';
 
 
68
  env.backends = ['wasm'];
69
  env.wasm = env.wasm || {};
70
  env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
@@ -79,7 +194,6 @@
79
  const clearLogsButton = document.getElementById('clear-logs');
80
  const localVideo = document.getElementById('localVideo');
81
  const remoteVideo = document.getElementById('remoteVideo');
82
- const modelProgressBar = document.getElementById('model-progress-bar');
83
 
84
  let myvad;
85
  let sttPipeline;
@@ -120,38 +234,13 @@
120
  async function initializePipelines() {
121
  try {
122
  addLog('System: Initializing pipelines...');
123
- const tasks = [
124
- { name: 'STT', task: 'automatic-speech-recognition', model: 'Xenova/whisper-tiny.en' },
125
- { name: 'TTS', task: 'text-to-speech', model: 'Xenova/mms-tts-eng' },
126
- { name: 'LLM', task: 'text-generation', model: 'Xenova/tiny-llm' }
127
- ];
128
-
129
- for (const [index, task] of tasks.entries()) {
130
- addLog(`System: Loading ${task.name} model...`);
131
- updateProgressBar((index / tasks.length) * 100);
132
- const pipelineInstance = await pipeline(task.task, task.model, {
133
- quantized: true,
134
- progress_callback: (progress) => {
135
- updateProgressBar(((index + progress) / tasks.length) * 100);
136
- }
137
- });
138
- addLog(`System: ${task.name} model loaded successfully.`);
139
-
140
- switch (task.name) {
141
- case 'STT':
142
- sttPipeline = pipelineInstance;
143
- break;
144
- case 'TTS':
145
- ttsPipeline = pipelineInstance;
146
- break;
147
- case 'LLM':
148
- llmPipeline = pipelineInstance;
149
- break;
150
- }
151
- }
152
-
153
- updateProgressBar(100);
154
- addLog('System: All pipelines initialized successfully.');
155
  startButton.disabled = false;
156
  loadingDiv.style.display = 'none';
157
  } catch (error) {
@@ -161,22 +250,15 @@
161
  }
162
  }
163
 
164
- function updateProgressBar(percentage) {
165
- modelProgressBar.style.width = `${percentage}%`;
166
- modelProgressBar.textContent = `${Math.round(percentage)}%`;
167
- }
168
-
169
  async function processSpeech(audio) {
170
  try {
171
  if (!sttPipeline || !ttsPipeline || !llmPipeline) {
172
  throw new Error('Pipelines not initialized');
173
  }
174
 
175
- addLog('System: Processing speech...');
176
  const transcription = await sttPipeline(audio);
177
  addLog(`User: ${transcription.text}`);
178
 
179
- addLog('System: Generating LLM response...');
180
  const llmResponse = await llmPipeline(transcription.text, {
181
  max_new_tokens: 50,
182
  temperature: 0.7
@@ -184,12 +266,10 @@
184
  const botResponse = llmResponse[0].generated_text;
185
  addLog(`Bot: ${botResponse}`);
186
 
187
- addLog('System: Generating speech from response...');
188
  isSpeaking = true;
189
  const speechOutput = await ttsPipeline(botResponse);
190
  await playAudio(speechOutput.audio);
191
  isSpeaking = false;
192
- addLog('System: Speech playback complete.');
193
  } catch (error) {
194
  console.error('Error processing speech:', error);
195
  addLog(`System: Error processing speech: ${error.message}`);
@@ -204,7 +284,6 @@
204
  messageElement.textContent = logMessage;
205
  logsDiv.appendChild(messageElement);
206
  logsDiv.scrollTop = logsDiv.scrollHeight;
207
- console.log(logMessage);
208
  }
209
 
210
  function playAudio(audioArray) {
@@ -243,7 +322,6 @@
243
 
244
  async function startListening() {
245
  try {
246
- addLog('System: Initializing audio context and stream...');
247
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
248
  analyser = audioContext.createAnalyser();
249
  analyser.fftSize = 128;
@@ -251,10 +329,12 @@
251
 
252
  localVideo.volume = 0;
253
  localVideo.muted = true;
 
 
254
  remoteVideo.volume = 0;
255
  remoteVideo.muted = true;
 
256
 
257
- addLog('System: Requesting media stream...');
258
  microphoneStream = await navigator.mediaDevices.getUserMedia({
259
  audio: true,
260
  video: { width: 1, height: 1 }
@@ -263,7 +343,9 @@
263
  localVideo.srcObject = microphoneStream;
264
  await localVideo.play();
265
 
266
- addLog('System: Setting up RTCPeerConnection for echo cancellation...');
 
 
267
  const offerOptions = {
268
  offerToReceiveAudio: true,
269
  offerToReceiveVideo: false,
@@ -289,21 +371,20 @@
289
  const source = audioContext.createMediaStreamSource(loopbackStream);
290
  source.connect(analyser);
291
 
292
- addLog('System: Initializing voice activity detection...');
293
  myvad = await vad.MicVAD.new({
294
  noiseSuppression: true,
295
  aggressiveness: 3,
296
  onSpeechStart: () => {
297
- addLog('System: Voice activity detected - speech start');
298
  updateVisualizer();
299
  if (isSpeaking) {
300
- addLog('System: User interrupted. Stopping bot speech.');
301
  stopCurrentAudio();
302
  isSpeaking = false;
303
  }
304
  },
305
  onSpeechEnd: (audio) => {
306
- addLog('System: Voice activity detected - speech end');
307
  cancelAnimationFrame(animationId);
308
  processSpeech(audio);
309
  }
@@ -312,7 +393,7 @@
312
  await myvad.start();
313
  startButton.textContent = 'End Call';
314
  isListening = true;
315
- addLog('System: Listening started successfully.');
316
  } catch (error) {
317
  console.error('Error starting voice activity:', error);
318
  addLog(`System: Error starting voice detection: ${error.message}`);
@@ -320,26 +401,21 @@
320
  }
321
 
322
  async function stopListening() {
323
- addLog('System: Stopping listening...');
324
  if (myvad) {
325
  try {
326
  await myvad.destroy();
327
- addLog('System: Voice activity detection stopped.');
328
  } catch (error) {
329
  console.error('Error stopping voice activity:', error);
330
- addLog(`System: Error stopping voice activity: ${error.message}`);
331
  }
332
  myvad = null;
333
  }
334
  if (microphoneStream) {
335
  microphoneStream.getTracks().forEach(track => track.stop());
336
  microphoneStream = null;
337
- addLog('System: Microphone stream stopped.');
338
  }
339
  if (audioContext) {
340
  await audioContext.close();
341
  audioContext = null;
342
- addLog('System: Audio context closed.');
343
  }
344
  if (localVideo) {
345
  localVideo.srcObject = null;
@@ -350,12 +426,10 @@
350
  if (rtcConnection) {
351
  rtcConnection.close();
352
  rtcConnection = null;
353
- addLog('System: RTCPeerConnection closed.');
354
  }
355
  if (rtcLoopbackConnection) {
356
  rtcLoopbackConnection.close();
357
  rtcLoopbackConnection = null;
358
- addLog('System: RTCPeerConnection loopback closed.');
359
  }
360
  loopbackStream = new MediaStream();
361
  stopCurrentAudio();
@@ -363,12 +437,12 @@
363
  isListening = false;
364
  addLog('System: Stopped listening.');
365
  cancelAnimationFrame(animationId);
 
366
  }
367
 
368
  startButton.addEventListener('click', toggleListening);
369
  clearLogsButton.addEventListener('click', () => {
370
  logsDiv.innerHTML = '';
371
- addLog('System: Logs cleared.');
372
  });
373
 
374
  createVisualizer();
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Voice Chat Bot with Advanced Echo Cancellation</title>
7
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
  <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
9
  <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
10
+
11
 
12
  <style>
13
+ body {
14
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
15
+ margin: 0;
16
+ padding: 20px;
17
+ background-color: #1a1a1a;
18
+ color: #f0f0f0;
19
+ }
20
+ .container {
21
+ max-width: 800px;
22
+ margin: 0 auto;
23
+ }
24
+ h1 {
25
+ color: #ffd700;
26
+ text-align: center;
27
+ margin-bottom: 10px;
28
+ }
29
+ .subtitle {
30
+ text-align: center;
31
+ color: #ffd700;
32
+ margin-bottom: 20px;
33
+ }
34
+ #chat-container {
35
+ display: flex;
36
+ flex-direction: column;
37
+ height: 70vh;
38
+ }
39
+ #conversation {
40
+ flex-grow: 1;
41
+ border: 1px solid #444;
42
+ padding: 10px;
43
+ overflow-y: scroll;
44
+ background-color: #2a2a2a;
45
+ border-radius: 5px;
46
+ margin-bottom: 20px;
47
+ }
48
+ #controls {
49
+ display: flex;
50
+ justify-content: center;
51
+ margin-bottom: 20px;
52
+ }
53
+ button {
54
+ font-size: 18px;
55
+ padding: 10px 20px;
56
+ background-color: #ffd700;
57
+ color: #1a1a1a;
58
+ border: none;
59
+ border-radius: 5px;
60
+ cursor: pointer;
61
+ transition: background-color 0.3s;
62
+ }
63
+ button:hover {
64
+ background-color: #ffec8b;
65
+ }
66
+ button:disabled {
67
+ background-color: #666;
68
+ cursor: not-allowed;
69
+ }
70
+ #visualizer {
71
  width: 100%;
72
+ height: 100px;
73
+ background-color: #2a2a2a;
74
  border-radius: 5px;
 
75
  overflow: hidden;
76
+ margin-bottom: 20px;
77
  }
78
+ .bar {
79
+ width: 5px;
80
+ height: 100%;
81
  background-color: #ffd700;
82
+ display: inline-block;
83
+ margin-right: 1px;
84
+ }
85
+ #loading {
86
+ position: fixed;
87
+ top: 0;
88
+ left: 0;
89
+ width: 100%;
90
+ height: 100%;
91
+ background-color: rgba(0, 0, 0, 0.8);
92
+ display: flex;
93
+ justify-content: center;
94
+ align-items: center;
95
+ z-index: 1000;
96
+ }
97
+ .spinner {
98
+ width: 50px;
99
+ height: 50px;
100
+ border: 5px solid #f3f3f3;
101
+ border-top: 5px solid #ffd700;
102
+ border-radius: 50%;
103
+ animation: spin 1s linear infinite;
104
+ }
105
+ @keyframes spin {
106
+ 0% { transform: rotate(0deg); }
107
+ 100% { transform: rotate(360deg); }
108
+ }
109
+ #configuration {
110
+ margin-bottom: 20px;
111
+ }
112
+ select {
113
+ width: 100%;
114
+ padding: 10px;
115
+ font-size: 16px;
116
+ background-color: #2a2a2a;
117
+ color: #f0f0f0;
118
+ border: 1px solid #444;
119
+ border-radius: 5px;
120
+ }
121
+ #model-info {
122
+ margin-top: 10px;
123
+ font-size: 14px;
124
+ color: #aaa;
125
+ }
126
+ #logs {
127
+ background-color: #2a2a2a;
128
+ border: 1px solid #444;
129
+ border-radius: 5px;
130
+ padding: 10px;
131
+ height: 200px;
132
+ overflow-y: scroll;
133
+ font-family: monospace;
134
+ font-size: 14px;
135
+ }
136
+ #clear-logs {
137
+ margin-top: 10px;
138
+ font-size: 14px;
139
+ padding: 5px 10px;
140
+ }
141
+ #localVideo, #remoteVideo {
142
+ display: none;
143
  }
144
  </style>
145
  </head>
 
161
  <option value="quality">Highest Quality</option>
162
  </select>
163
  <div id="model-info">
164
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/gpt2-tiny-english
 
 
 
165
  </div>
166
  </div>
167
  <div id="visualizer"></div>
 
178
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
179
 
180
  env.localModelPath = './models';
181
+
182
+ // Configure environment before initializing pipelines
183
  env.backends = ['wasm'];
184
  env.wasm = env.wasm || {};
185
  env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
 
194
  const clearLogsButton = document.getElementById('clear-logs');
195
  const localVideo = document.getElementById('localVideo');
196
  const remoteVideo = document.getElementById('remoteVideo');
 
197
 
198
  let myvad;
199
  let sttPipeline;
 
234
  async function initializePipelines() {
235
  try {
236
  addLog('System: Initializing pipelines...');
237
+ [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
238
+ pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
239
+ pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
240
+ pipeline('text-generation', 'Xenova/gpt2-tiny-english', { quantized: true })
241
+ ]);
242
+
243
+ addLog('System: Digital Human Voice Chat initialized with GPT-2 Tiny. Click "Begin Call" to start.');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  startButton.disabled = false;
245
  loadingDiv.style.display = 'none';
246
  } catch (error) {
 
250
  }
251
  }
252
 
 
 
 
 
 
253
  async function processSpeech(audio) {
254
  try {
255
  if (!sttPipeline || !ttsPipeline || !llmPipeline) {
256
  throw new Error('Pipelines not initialized');
257
  }
258
 
 
259
  const transcription = await sttPipeline(audio);
260
  addLog(`User: ${transcription.text}`);
261
 
 
262
  const llmResponse = await llmPipeline(transcription.text, {
263
  max_new_tokens: 50,
264
  temperature: 0.7
 
266
  const botResponse = llmResponse[0].generated_text;
267
  addLog(`Bot: ${botResponse}`);
268
 
 
269
  isSpeaking = true;
270
  const speechOutput = await ttsPipeline(botResponse);
271
  await playAudio(speechOutput.audio);
272
  isSpeaking = false;
 
273
  } catch (error) {
274
  console.error('Error processing speech:', error);
275
  addLog(`System: Error processing speech: ${error.message}`);
 
284
  messageElement.textContent = logMessage;
285
  logsDiv.appendChild(messageElement);
286
  logsDiv.scrollTop = logsDiv.scrollHeight;
 
287
  }
288
 
289
  function playAudio(audioArray) {
 
322
 
323
  async function startListening() {
324
  try {
 
325
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
326
  analyser = audioContext.createAnalyser();
327
  analyser.fftSize = 128;
 
329
 
330
  localVideo.volume = 0;
331
  localVideo.muted = true;
332
+ document.getElementById('localVideo').volume = 0;
333
+
334
  remoteVideo.volume = 0;
335
  remoteVideo.muted = true;
336
+ document.getElementById('remoteVideo').volume = 0;
337
 
 
338
  microphoneStream = await navigator.mediaDevices.getUserMedia({
339
  audio: true,
340
  video: { width: 1, height: 1 }
 
343
  localVideo.srcObject = microphoneStream;
344
  await localVideo.play();
345
 
346
+ console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
347
+ console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
348
+
349
  const offerOptions = {
350
  offerToReceiveAudio: true,
351
  offerToReceiveVideo: false,
 
371
  const source = audioContext.createMediaStreamSource(loopbackStream);
372
  source.connect(analyser);
373
 
 
374
  myvad = await vad.MicVAD.new({
375
  noiseSuppression: true,
376
  aggressiveness: 3,
377
  onSpeechStart: () => {
378
+ addLog('--- Voice activity: speech start');
379
  updateVisualizer();
380
  if (isSpeaking) {
381
+ addLog('User interrupted. Stopping bot speech.');
382
  stopCurrentAudio();
383
  isSpeaking = false;
384
  }
385
  },
386
  onSpeechEnd: (audio) => {
387
+ addLog('--- Voice activity: speech end');
388
  cancelAnimationFrame(animationId);
389
  processSpeech(audio);
390
  }
 
393
  await myvad.start();
394
  startButton.textContent = 'End Call';
395
  isListening = true;
396
+ addLog('System: Listening...');
397
  } catch (error) {
398
  console.error('Error starting voice activity:', error);
399
  addLog(`System: Error starting voice detection: ${error.message}`);
 
401
  }
402
 
403
  async function stopListening() {
 
404
  if (myvad) {
405
  try {
406
  await myvad.destroy();
 
407
  } catch (error) {
408
  console.error('Error stopping voice activity:', error);
 
409
  }
410
  myvad = null;
411
  }
412
  if (microphoneStream) {
413
  microphoneStream.getTracks().forEach(track => track.stop());
414
  microphoneStream = null;
 
415
  }
416
  if (audioContext) {
417
  await audioContext.close();
418
  audioContext = null;
 
419
  }
420
  if (localVideo) {
421
  localVideo.srcObject = null;
 
426
  if (rtcConnection) {
427
  rtcConnection.close();
428
  rtcConnection = null;
 
429
  }
430
  if (rtcLoopbackConnection) {
431
  rtcLoopbackConnection.close();
432
  rtcLoopbackConnection = null;
 
433
  }
434
  loopbackStream = new MediaStream();
435
  stopCurrentAudio();
 
437
  isListening = false;
438
  addLog('System: Stopped listening.');
439
  cancelAnimationFrame(animationId);
440
+ addLog('System: Microphone closed');
441
  }
442
 
443
  startButton.addEventListener('click', toggleListening);
444
  clearLogsButton.addEventListener('click', () => {
445
  logsDiv.innerHTML = '';
 
446
  });
447
 
448
  createVisualizer();