atlury commited on
Commit
b15f4c6
·
verified ·
1 Parent(s): 08925c7

Create index.backup6.html

Browse files
Files changed (1) hide show
  1. index.backup6.html +459 -0
index.backup6.html ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Voice Chat Bot with Advanced Echo Cancellation</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
+ <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
10
+
11
+
12
+ <style>
13
+ body {
14
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
15
+ margin: 0;
16
+ padding: 20px;
17
+ background-color: #1a1a1a;
18
+ color: #f0f0f0;
19
+ }
20
+ .container {
21
+ max-width: 800px;
22
+ margin: 0 auto;
23
+ }
24
+ h1 {
25
+ color: #ffd700;
26
+ text-align: center;
27
+ margin-bottom: 10px;
28
+ }
29
+ .subtitle {
30
+ text-align: center;
31
+ color: #ffd700;
32
+ margin-bottom: 20px;
33
+ }
34
+ #chat-container {
35
+ display: flex;
36
+ flex-direction: column;
37
+ height: 70vh;
38
+ }
39
+ #conversation {
40
+ flex-grow: 1;
41
+ border: 1px solid #444;
42
+ padding: 10px;
43
+ overflow-y: scroll;
44
+ background-color: #2a2a2a;
45
+ border-radius: 5px;
46
+ margin-bottom: 20px;
47
+ }
48
+ #controls {
49
+ display: flex;
50
+ justify-content: center;
51
+ margin-bottom: 20px;
52
+ }
53
+ button {
54
+ font-size: 18px;
55
+ padding: 10px 20px;
56
+ background-color: #ffd700;
57
+ color: #1a1a1a;
58
+ border: none;
59
+ border-radius: 5px;
60
+ cursor: pointer;
61
+ transition: background-color 0.3s;
62
+ }
63
+ button:hover {
64
+ background-color: #ffec8b;
65
+ }
66
+ button:disabled {
67
+ background-color: #666;
68
+ cursor: not-allowed;
69
+ }
70
+ #visualizer {
71
+ width: 100%;
72
+ height: 100px;
73
+ background-color: #2a2a2a;
74
+ border-radius: 5px;
75
+ overflow: hidden;
76
+ margin-bottom: 20px;
77
+ }
78
+ .bar {
79
+ width: 5px;
80
+ height: 100%;
81
+ background-color: #ffd700;
82
+ display: inline-block;
83
+ margin-right: 1px;
84
+ }
85
+ #loading {
86
+ position: fixed;
87
+ top: 0;
88
+ left: 0;
89
+ width: 100%;
90
+ height: 100%;
91
+ background-color: rgba(0, 0, 0, 0.8);
92
+ display: flex;
93
+ justify-content: center;
94
+ align-items: center;
95
+ z-index: 1000;
96
+ }
97
+ .spinner {
98
+ width: 50px;
99
+ height: 50px;
100
+ border: 5px solid #f3f3f3;
101
+ border-top: 5px solid #ffd700;
102
+ border-radius: 50%;
103
+ animation: spin 1s linear infinite;
104
+ }
105
+ @keyframes spin {
106
+ 0% { transform: rotate(0deg); }
107
+ 100% { transform: rotate(360deg); }
108
+ }
109
+ #configuration {
110
+ margin-bottom: 20px;
111
+ }
112
+ select {
113
+ width: 100%;
114
+ padding: 10px;
115
+ font-size: 16px;
116
+ background-color: #2a2a2a;
117
+ color: #f0f0f0;
118
+ border: 1px solid #444;
119
+ border-radius: 5px;
120
+ }
121
+ #model-info {
122
+ margin-top: 10px;
123
+ font-size: 14px;
124
+ color: #aaa;
125
+ }
126
+ #logs {
127
+ background-color: #2a2a2a;
128
+ border: 1px solid #444;
129
+ border-radius: 5px;
130
+ padding: 10px;
131
+ height: 200px;
132
+ overflow-y: scroll;
133
+ font-family: monospace;
134
+ font-size: 14px;
135
+ }
136
+ #clear-logs {
137
+ margin-top: 10px;
138
+ font-size: 14px;
139
+ padding: 5px 10px;
140
+ }
141
+ #localVideo, #remoteVideo {
142
+ display: none;
143
+ }
144
+ </style>
145
+ </head>
146
+ <body>
147
+ <div id="loading">
148
+ <div class="spinner"></div>
149
+ </div>
150
+ <div class="container">
151
+ <h1>Digital Human Voice Chat</h1>
152
+ <p class="subtitle">For best results, use headphones.</p>
153
+ <div id="chat-container">
154
+ <div id="controls">
155
+ <button id="startButton" disabled>Begin Call</button>
156
+ </div>
157
+ <div id="configuration">
158
+ <select id="configSelect">
159
+ <option value="fastest">Fastest</option>
160
+ <option value="balanced">Balanced</option>
161
+ <option value="quality">Highest Quality</option>
162
+ </select>
163
+ <div id="model-info">
164
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
165
+ </div>
166
+ </div>
167
+ <div id="visualizer"></div>
168
+ <div id="conversation"></div>
169
+ </div>
170
+ <h2>Logs</h2>
171
+ <div id="logs"></div>
172
+ <button id="clear-logs">Clear</button>
173
+ </div>
174
+ <video id="localVideo" autoplay></video>
175
+ <video id="remoteVideo" autoplay></video>
176
+
177
+ <script type="module">
178
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
179
+
180
+ env.localModelPath = './models';
181
+
182
+ //BELOW 5 statements added by RAHUL
183
+ // Configure environment before initializing pipelines
184
+ env.backends = ['wasm'];
185
+ env.wasm = env.wasm || {};
186
+ env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/'; // Ensure correct WASM paths
187
+ env.wasm.simd = true; // Enable SIMD if available
188
+ env.numThreads = navigator.hardwareConcurrency || 4; // Use available CPU cores
189
+
190
+ const conversationDiv = document.getElementById('conversation');
191
+ const startButton = document.getElementById('startButton');
192
+ const visualizer = document.getElementById('visualizer');
193
+ const loadingDiv = document.getElementById('loading');
194
+ const logsDiv = document.getElementById('logs');
195
+ const clearLogsButton = document.getElementById('clear-logs');
196
+ const localVideo = document.getElementById('localVideo');
197
+ const remoteVideo = document.getElementById('remoteVideo');
198
+
199
+ let myvad;
200
+ let sttPipeline;
201
+ let ttsPipeline;
202
+ let audioContext;
203
+ let analyser;
204
+ let dataArray;
205
+ let bars;
206
+ let animationId;
207
+ let isListening = false;
208
+ let microphoneStream;
209
+ let isSpeaking = false;
210
+ let currentAudioSource = null;
211
+ let rtcConnection = null;
212
+ let rtcLoopbackConnection = null;
213
+ let loopbackStream = new MediaStream();
214
+
215
+ function createVisualizer() {
216
+ const barCount = 64;
217
+ for (let i = 0; i < barCount; i++) {
218
+ const bar = document.createElement('div');
219
+ bar.className = 'bar';
220
+ visualizer.appendChild(bar);
221
+ }
222
+ bars = visualizer.getElementsByClassName('bar');
223
+ }
224
+
225
+ function updateVisualizer() {
226
+ analyser.getByteFrequencyData(dataArray);
227
+ for (let i = 0; i < bars.length; i++) {
228
+ const barHeight = dataArray[i] / 2;
229
+ bars[i].style.height = barHeight + 'px';
230
+ }
231
+ // Use setTimeout instead of requestAnimationFrame to reduce update frequency - RAHUL ATLURY
232
+ animationId = setTimeout(updateVisualizer, 50); // Update every 50ms - RAHUL ATLURY
233
+
234
+ //animationId = requestAnimationFrame(updateVisualizer);
235
+ }
236
+
237
+
238
+ async function initializePipelines() {
239
+ try {
240
+
241
+ //sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }); // added , { quantized: true }
242
+ //ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
243
+ // quantized: true, //changed to true - RAHUL ATLURY
244
+ //});
245
+
246
+ [sttPipeline, ttsPipeline] = await Promise.all([
247
+ pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
248
+ pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
249
+ ]);
250
+
251
+ addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
252
+ startButton.disabled = false;
253
+ loadingDiv.style.display = 'none';
254
+ } catch (error) {
255
+ console.error('Error initializing pipelines:', error);
256
+ addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
257
+ loadingDiv.style.display = 'none';
258
+ }
259
+ }
260
+
261
+ async function processSpeech(audio) {
262
+ try {
263
+ if (!sttPipeline || !ttsPipeline) {
264
+ throw new Error('Pipelines not initialized');
265
+ }
266
+
267
+ const transcription = await sttPipeline(audio);
268
+ addLog(`User: ${transcription.text}`);
269
+
270
+ const botResponse = `I heard you say: "${transcription.text}".`;
271
+ addLog(`Bot: ${botResponse}`);
272
+
273
+ isSpeaking = true;
274
+ const speechOutput = await ttsPipeline(botResponse);
275
+ await playAudio(speechOutput.audio);
276
+ isSpeaking = false;
277
+ } catch (error) {
278
+ console.error('Error processing speech:', error);
279
+ addLog('System: Error processing speech. Please try again.');
280
+ }
281
+ }
282
+
283
+ function addLog(message) {
284
+ const now = new Date();
285
+ const timestamp = now.toLocaleTimeString();
286
+ const logMessage = `[${timestamp}] ${message}`;
287
+ const messageElement = document.createElement('div');
288
+ messageElement.textContent = logMessage;
289
+ logsDiv.appendChild(messageElement);
290
+ logsDiv.scrollTop = logsDiv.scrollHeight;
291
+ }
292
+
293
+ function playAudio(audioArray) {
294
+ return new Promise((resolve) => {
295
+ const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
296
+ const channelData = audioBuffer.getChannelData(0);
297
+ channelData.set(audioArray);
298
+
299
+ const source = audioContext.createBufferSource();
300
+ currentAudioSource = source;
301
+ source.buffer = audioBuffer;
302
+ source.connect(analyser);
303
+ analyser.connect(audioContext.destination);
304
+ source.start();
305
+ source.onended = () => {
306
+ currentAudioSource = null;
307
+ resolve();
308
+ };
309
+ });
310
+ }
311
+
312
+ function stopCurrentAudio() {
313
+ if (currentAudioSource) {
314
+ currentAudioSource.stop();
315
+ currentAudioSource = null;
316
+ }
317
+ }
318
+
319
+ async function toggleListening() {
320
+ if (isListening) {
321
+ await stopListening();
322
+ } else {
323
+ await startListening();
324
+ }
325
+ }
326
+
327
+ async function startListening() {
328
+ try {
329
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
330
+ analyser = audioContext.createAnalyser();
331
+ analyser.fftSize = 128;
332
+ dataArray = new Uint8Array(analyser.frequencyBinCount);
333
+
334
+ localVideo.volume = 0;
335
+ localVideo.muted = true;
336
+ document.getElementById('localVideo').volume = 0;
337
+
338
+ remoteVideo.volume = 0;
339
+ remoteVideo.muted = true;
340
+ document.getElementById('remoteVideo').volume = 0;
341
+
342
+ // Request both audio and video streams
343
+ microphoneStream = await navigator.mediaDevices.getUserMedia({
344
+ audio: true,
345
+ video: { width: 1, height: 1 } // Minimal video for echo cancellation
346
+ });
347
+
348
+ localVideo.srcObject = microphoneStream;
349
+ await localVideo.play();
350
+
351
+ console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
352
+ console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
353
+
354
+ // Implement loopback hack for improved echo cancellation
355
+ const offerOptions = {
356
+ offerToReceiveAudio: true,
357
+ offerToReceiveVideo: false,
358
+ };
359
+
360
+ rtcConnection = new RTCPeerConnection();
361
+ rtcLoopbackConnection = new RTCPeerConnection();
362
+
363
+ rtcConnection.onicecandidate = e => e.candidate && rtcLoopbackConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
364
+ rtcLoopbackConnection.onicecandidate = e => e.candidate && rtcConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
365
+
366
+ rtcLoopbackConnection.ontrack = e => e.streams[0].getTracks().forEach(track => loopbackStream.addTrack(track));
367
+
368
+ microphoneStream.getTracks().forEach(track => rtcConnection.addTrack(track, microphoneStream));
369
+
370
+ const offer = await rtcConnection.createOffer(offerOptions);
371
+ await rtcConnection.setLocalDescription(offer);
372
+ await rtcLoopbackConnection.setRemoteDescription(offer);
373
+ const answer = await rtcLoopbackConnection.createAnswer();
374
+ await rtcLoopbackConnection.setLocalDescription(answer);
375
+ await rtcConnection.setRemoteDescription(answer);
376
+
377
+ // Use the loopback stream for audio processing
378
+ const source = audioContext.createMediaStreamSource(loopbackStream);
379
+ source.connect(analyser);
380
+
381
+ myvad = await vad.MicVAD.new({
382
+ noiseSuppression: true, ///Added by RAHUL Atlury
383
+ aggressiveness: 3, // Higher value for more aggressive detection Added by RAHUL ATLURY
384
+ onSpeechStart: () => {
385
+ addLog('--- Voice activity: speech start');
386
+ updateVisualizer();
387
+ if (isSpeaking) {
388
+ addLog('User interrupted. Stopping bot speech.');
389
+ stopCurrentAudio();
390
+ isSpeaking = false;
391
+ }
392
+ },
393
+ onSpeechEnd: (audio) => {
394
+ addLog('--- Voice activity: speech end');
395
+ cancelAnimationFrame(animationId);
396
+ processSpeech(audio);
397
+ }
398
+ });
399
+
400
+ await myvad.start();
401
+ startButton.textContent = 'End Call';
402
+ isListening = true;
403
+ addLog('System: Listening...');
404
+ } catch (error) {
405
+ console.error('Error starting voice activity:', error);
406
+ addLog('System: Error starting voice detection. Please check your microphone and try again.');
407
+ }
408
+ }
409
+
410
+ async function stopListening() {
411
+ if (myvad) {
412
+ try {
413
+ await myvad.destroy();
414
+ } catch (error) {
415
+ console.error('Error stopping voice activity:', error);
416
+ }
417
+ myvad = null;
418
+ }
419
+ if (microphoneStream) {
420
+ microphoneStream.getTracks().forEach(track => track.stop());
421
+ microphoneStream = null;
422
+ }
423
+ if (audioContext) {
424
+ await audioContext.close();
425
+ audioContext = null;
426
+ }
427
+ if (localVideo) {
428
+ localVideo.srcObject = null;
429
+ }
430
+ if (remoteVideo) {
431
+ remoteVideo.srcObject = null;
432
+ }
433
+ if (rtcConnection) {
434
+ rtcConnection.close();
435
+ rtcConnection = null;
436
+ }
437
+ if (rtcLoopbackConnection) {
438
+ rtcLoopbackConnection.close();
439
+ rtcLoopbackConnection = null;
440
+ }
441
+ loopbackStream = new MediaStream();
442
+ stopCurrentAudio();
443
+ startButton.textContent = 'Begin Call';
444
+ isListening = false;
445
+ addLog('System: Stopped listening.');
446
+ cancelAnimationFrame(animationId);
447
+ addLog('System: Microphone closed');
448
+ }
449
+
450
+ startButton.addEventListener('click', toggleListening);
451
+ clearLogsButton.addEventListener('click', () => {
452
+ logsDiv.innerHTML = '';
453
+ });
454
+
455
+ createVisualizer();
456
+ initializePipelines();
457
+ </script>
458
+ </body>
459
+ </html>