atlury commited on
Commit
2a7725b
·
verified ·
1 Parent(s): c7ff723

Create index.html

Browse files
Files changed (1) hide show
  1. index.html +396 -0
index.html ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Voice Chat Bot</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
+ <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
10
+ <style>
11
+ body {
12
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
13
+ margin: 0;
14
+ padding: 20px;
15
+ background-color: #1a1a1a;
16
+ color: #f0f0f0;
17
+ }
18
+ .container {
19
+ max-width: 800px;
20
+ margin: 0 auto;
21
+ }
22
+ h1 {
23
+ color: #ffd700;
24
+ text-align: center;
25
+ margin-bottom: 10px;
26
+ }
27
+ .subtitle {
28
+ text-align: center;
29
+ color: #ffd700;
30
+ margin-bottom: 20px;
31
+ }
32
+ #chat-container {
33
+ display: flex;
34
+ flex-direction: column;
35
+ height: 70vh;
36
+ }
37
+ #conversation {
38
+ flex-grow: 1;
39
+ border: 1px solid #444;
40
+ padding: 10px;
41
+ overflow-y: scroll;
42
+ background-color: #2a2a2a;
43
+ border-radius: 5px;
44
+ margin-bottom: 20px;
45
+ }
46
+ #controls {
47
+ display: flex;
48
+ justify-content: center;
49
+ margin-bottom: 20px;
50
+ }
51
+ button {
52
+ font-size: 18px;
53
+ padding: 10px 20px;
54
+ background-color: #ffd700;
55
+ color: #1a1a1a;
56
+ border: none;
57
+ border-radius: 5px;
58
+ cursor: pointer;
59
+ transition: background-color 0.3s;
60
+ }
61
+ button:hover {
62
+ background-color: #ffec8b;
63
+ }
64
+ button:disabled {
65
+ background-color: #666;
66
+ cursor: not-allowed;
67
+ }
68
+ #visualizer {
69
+ width: 100%;
70
+ height: 100px;
71
+ background-color: #2a2a2a;
72
+ border-radius: 5px;
73
+ overflow: hidden;
74
+ margin-bottom: 20px;
75
+ }
76
+ .bar {
77
+ width: 5px;
78
+ height: 100%;
79
+ background-color: #ffd700;
80
+ display: inline-block;
81
+ margin-right: 1px;
82
+ }
83
+ #loading {
84
+ position: fixed;
85
+ top: 0;
86
+ left: 0;
87
+ width: 100%;
88
+ height: 100%;
89
+ background-color: rgba(0, 0, 0, 0.8);
90
+ display: flex;
91
+ justify-content: center;
92
+ align-items: center;
93
+ z-index: 1000;
94
+ }
95
+ .spinner {
96
+ width: 50px;
97
+ height: 50px;
98
+ border: 5px solid #f3f3f3;
99
+ border-top: 5px solid #ffd700;
100
+ border-radius: 50%;
101
+ animation: spin 1s linear infinite;
102
+ }
103
+ @keyframes spin {
104
+ 0% { transform: rotate(0deg); }
105
+ 100% { transform: rotate(360deg); }
106
+ }
107
+ #configuration {
108
+ margin-bottom: 20px;
109
+ }
110
+ select {
111
+ width: 100%;
112
+ padding: 10px;
113
+ font-size: 16px;
114
+ background-color: #2a2a2a;
115
+ color: #f0f0f0;
116
+ border: 1px solid #444;
117
+ border-radius: 5px;
118
+ }
119
+ #model-info {
120
+ margin-top: 10px;
121
+ font-size: 14px;
122
+ color: #aaa;
123
+ }
124
+ #logs {
125
+ background-color: #2a2a2a;
126
+ border: 1px solid #444;
127
+ border-radius: 5px;
128
+ padding: 10px;
129
+ height: 200px;
130
+ overflow-y: scroll;
131
+ font-family: monospace;
132
+ font-size: 14px;
133
+ }
134
+ #clear-logs {
135
+ margin-top: 10px;
136
+ font-size: 14px;
137
+ padding: 5px 10px;
138
+ }
139
+ </style>
140
+ </head>
141
+ <body>
142
+ <div id="loading">
143
+ <div class="spinner"></div>
144
+ </div>
145
+ <div class="container">
146
+ <h1>Voice Chat Bot Demo</h1>
147
+ <p class="subtitle">For best results, use headphones.</p>
148
+ <div id="chat-container">
149
+ <div id="controls">
150
+ <button id="startButton" disabled>Begin Call</button>
151
+ </div>
152
+ <div id="configuration">
153
+ <select id="configSelect">
154
+ <option value="fastest">Fastest</option>
155
+ <option value="balanced">Balanced</option>
156
+ <option value="quality">Highest Quality</option>
157
+ </select>
158
+ <div id="model-info">
159
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
160
+ </div>
161
+ </div>
162
+ <div id="visualizer"></div>
163
+ <div id="conversation"></div>
164
+ </div>
165
+ <h2>Logs</h2>
166
+ <div id="logs"></div>
167
+ <button id="clear-logs">Clear</button>
168
+ </div>
169
+
170
+ <script type="module">
171
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
172
+
173
+ env.localModelPath = './models';
174
+
175
+ const conversationDiv = document.getElementById('conversation');
176
+ const startButton = document.getElementById('startButton');
177
+ const visualizer = document.getElementById('visualizer');
178
+ const loadingDiv = document.getElementById('loading');
179
+ const logsDiv = document.getElementById('logs');
180
+ const clearLogsButton = document.getElementById('clear-logs');
181
+
182
+ let myvad;
183
+ let sttPipeline;
184
+ let ttsPipeline;
185
+ let audioContext;
186
+ let analyser;
187
+ let dataArray;
188
+ let bars;
189
+ let animationId;
190
+ let isListening = false;
191
+ let microphoneStream;
192
+ let isSpeaking = false;
193
+ let currentAudioSource = null;
194
+ let microphoneGainNode;
195
+
196
+ function createVisualizer() {
197
+ const barCount = 64;
198
+ for (let i = 0; i < barCount; i++) {
199
+ const bar = document.createElement('div');
200
+ bar.className = 'bar';
201
+ visualizer.appendChild(bar);
202
+ }
203
+ bars = visualizer.getElementsByClassName('bar');
204
+ }
205
+
206
+ function updateVisualizer() {
207
+ analyser.getByteFrequencyData(dataArray);
208
+ for (let i = 0; i < bars.length; i++) {
209
+ const barHeight = dataArray[i] / 2;
210
+ bars[i].style.height = barHeight + 'px';
211
+ }
212
+ animationId = requestAnimationFrame(updateVisualizer);
213
+ }
214
+
215
+ async function initializePipelines() {
216
+ try {
217
+ sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
218
+ ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
219
+ quantized: false,
220
+ });
221
+ addLog('System: Voice Chat Bot initialized. Click "Begin Call" to start.');
222
+ startButton.disabled = false;
223
+ loadingDiv.style.display = 'none';
224
+ } catch (error) {
225
+ console.error('Error initializing pipelines:', error);
226
+ addLog('System: Error initializing Voice Chat Bot. Please check the console for details.');
227
+ loadingDiv.style.display = 'none';
228
+ }
229
+ }
230
+
231
+ async function processSpeech(audio) {
232
+ try {
233
+ if (!sttPipeline || !ttsPipeline) {
234
+ throw new Error('Pipelines not initialized');
235
+ }
236
+
237
+ const transcription = await sttPipeline(audio);
238
+ addLog(`User: ${transcription.text}`);
239
+
240
+ const botResponse = `I heard you say: "${transcription.text}".`;
241
+ addLog(`Bot: ${botResponse}`);
242
+
243
+ isSpeaking = true;
244
+ reduceMicrophoneSensitivity();
245
+ const speechOutput = await ttsPipeline(botResponse);
246
+ await playAudio(speechOutput.audio);
247
+ isSpeaking = false;
248
+
249
+ // Add a small delay before restoring microphone sensitivity
250
+ setTimeout(restoreMicrophoneSensitivity, 200);
251
+ } catch (error) {
252
+ console.error('Error processing speech:', error);
253
+ addLog('System: Error processing speech. Please try again.');
254
+ }
255
+ }
256
+
257
+ function addLog(message) {
258
+ const now = new Date();
259
+ const timestamp = now.toLocaleTimeString();
260
+ const logMessage = `[${timestamp}] ${message}`;
261
+ const messageElement = document.createElement('div');
262
+ messageElement.textContent = logMessage;
263
+ logsDiv.appendChild(messageElement);
264
+ logsDiv.scrollTop = logsDiv.scrollHeight;
265
+ }
266
+
267
+ function playAudio(audioArray) {
268
+ return new Promise((resolve) => {
269
+ const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
270
+ const channelData = audioBuffer.getChannelData(0);
271
+ channelData.set(audioArray);
272
+
273
+ const source = audioContext.createBufferSource();
274
+ currentAudioSource = source;
275
+ source.buffer = audioBuffer;
276
+ source.connect(analyser);
277
+ analyser.connect(audioContext.destination);
278
+ source.start();
279
+ source.onended = () => {
280
+ currentAudioSource = null;
281
+ resolve();
282
+ };
283
+ });
284
+ }
285
+
286
+ function stopCurrentAudio() {
287
+ if (currentAudioSource) {
288
+ currentAudioSource.stop();
289
+ currentAudioSource = null;
290
+ }
291
+ }
292
+
293
+ function reduceMicrophoneSensitivity() {
294
+ if (microphoneGainNode) {
295
+ // Reduce sensitivity to 20% (0.2) instead of muting completely
296
+ microphoneGainNode.gain.setValueAtTime(0.2, audioContext.currentTime);
297
+ }
298
+ }
299
+
300
+ function restoreMicrophoneSensitivity() {
301
+ if (microphoneGainNode) {
302
+ microphoneGainNode.gain.setValueAtTime(1, audioContext.currentTime);
303
+ }
304
+ }
305
+
306
+ async function toggleListening() {
307
+ if (isListening) {
308
+ await stopListening();
309
+ } else {
310
+ await startListening();
311
+ }
312
+ }
313
+
314
+ async function startListening() {
315
+ try {
316
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
317
+ analyser = audioContext.createAnalyser();
318
+ analyser.fftSize = 128;
319
+ dataArray = new Uint8Array(analyser.frequencyBinCount);
320
+
321
+ microphoneGainNode = audioContext.createGain();
322
+
323
+ myvad = await vad.MicVAD.new({
324
+ onSpeechStart: () => {
325
+ addLog('--- vad: speech start');
326
+ updateVisualizer();
327
+ if (isSpeaking) {
328
+ addLog('User interrupted. Stopping bot speech.');
329
+ stopCurrentAudio();
330
+ isSpeaking = false;
331
+ restoreMicrophoneSensitivity();
332
+ }
333
+ },
334
+ onSpeechEnd: (audio) => {
335
+ addLog('--- vad: speech end');
336
+ cancelAnimationFrame(animationId);
337
+ processSpeech(audio);
338
+ }
339
+ });
340
+
341
+ microphoneStream = await navigator.mediaDevices.getUserMedia({
342
+ audio: {
343
+ echoCancellation: true,
344
+ noiseSuppression: true,
345
+ autoGainControl: true
346
+ }
347
+ });
348
+ const source = audioContext.createMediaStreamSource(microphoneStream);
349
+ source.connect(microphoneGainNode);
350
+ microphoneGainNode.connect(analyser);
351
+
352
+ await myvad.start();
353
+ startButton.textContent = 'End Call';
354
+ isListening = true;
355
+ addLog('System: Listening...');
356
+ } catch (error) {
357
+ console.error('Error starting VAD:', error);
358
+ addLog('System: Error starting voice detection. Please check your microphone and try again.');
359
+ }
360
+ }
361
+
362
+ async function stopListening() {
363
+ if (myvad) {
364
+ try {
365
+ await myvad.destroy();
366
+ } catch (error) {
367
+ console.error('Error stopping VAD:', error);
368
+ }
369
+ myvad = null;
370
+ }
371
+ if (microphoneStream) {
372
+ microphoneStream.getTracks().forEach(track => track.stop());
373
+ microphoneStream = null;
374
+ }
375
+ if (audioContext) {
376
+ await audioContext.close();
377
+ audioContext = null;
378
+ }
379
+ stopCurrentAudio();
380
+ startButton.textContent = 'Begin Call';
381
+ isListening = false;
382
+ addLog('System: Stopped listening.');
383
+ cancelAnimationFrame(animationId);
384
+ addLog('System: Microphone closed');
385
+ }
386
+
387
+ startButton.addEventListener('click', toggleListening);
388
+ clearLogsButton.addEventListener('click', () => {
389
+ logsDiv.innerHTML = '';
390
+ });
391
+
392
+ createVisualizer();
393
+ initializePipelines();
394
+ </script>
395
+ </body>
396
+ </html>