atlury commited on
Commit
2343fbf
·
verified ·
1 Parent(s): f04deee

Create index.html

Browse files
Files changed (1) hide show
  1. index.html +480 -0
index.html ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <!-- Meta and Title -->
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Digital Human Voice Chat with LLM Integration</title>
8
+
9
+ <!-- External Scripts -->
10
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
11
+ <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
12
+ <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
13
+
14
+ <!-- Styles -->
15
+ <style>
16
+ body {
17
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
18
+ margin: 0;
19
+ padding: 20px;
20
+ background-color: #1a1a1a;
21
+ color: #f0f0f0;
22
+ }
23
+ .container {
24
+ max-width: 800px;
25
+ margin: 0 auto;
26
+ }
27
+ h1 {
28
+ color: #ffd700;
29
+ text-align: center;
30
+ margin-bottom: 10px;
31
+ }
32
+ .subtitle {
33
+ text-align: center;
34
+ color: #ffd700;
35
+ margin-bottom: 20px;
36
+ }
37
+ #chat-container {
38
+ display: flex;
39
+ flex-direction: column;
40
+ height: 70vh;
41
+ }
42
+ #conversation {
43
+ flex-grow: 1;
44
+ border: 1px solid #444;
45
+ padding: 10px;
46
+ overflow-y: scroll;
47
+ background-color: #2a2a2a;
48
+ border-radius: 5px;
49
+ margin-bottom: 20px;
50
+ }
51
+ #controls {
52
+ display: flex;
53
+ justify-content: center;
54
+ margin-bottom: 20px;
55
+ }
56
+ button {
57
+ font-size: 18px;
58
+ padding: 10px 20px;
59
+ background-color: #ffd700;
60
+ color: #1a1a1a;
61
+ border: none;
62
+ border-radius: 5px;
63
+ cursor: pointer;
64
+ transition: background-color 0.3s;
65
+ }
66
+ button:hover {
67
+ background-color: #ffec8b;
68
+ }
69
+ button:disabled {
70
+ background-color: #666;
71
+ cursor: not-allowed;
72
+ }
73
+ #visualizer {
74
+ width: 100%;
75
+ height: 100px;
76
+ background-color: #2a2a2a;
77
+ border-radius: 5px;
78
+ overflow: hidden;
79
+ margin-bottom: 20px;
80
+ }
81
+ .bar {
82
+ width: 5px;
83
+ height: 100%;
84
+ background-color: #ffd700;
85
+ display: inline-block;
86
+ margin-right: 1px;
87
+ }
88
+ #loading {
89
+ position: fixed;
90
+ top: 0;
91
+ left: 0;
92
+ width: 100%;
93
+ height: 100%;
94
+ background-color: rgba(0, 0, 0, 0.8);
95
+ display: flex;
96
+ justify-content: center;
97
+ align-items: center;
98
+ z-index: 1000;
99
+ }
100
+ .spinner {
101
+ width: 50px;
102
+ height: 50px;
103
+ border: 5px solid #f3f3f3;
104
+ border-top: 5px solid #ffd700;
105
+ border-radius: 50%;
106
+ animation: spin 1s linear infinite;
107
+ }
108
+ @keyframes spin {
109
+ 0% { transform: rotate(0deg); }
110
+ 100% { transform: rotate(360deg); }
111
+ }
112
+ #configuration {
113
+ margin-bottom: 20px;
114
+ }
115
+ select {
116
+ width: 100%;
117
+ padding: 10px;
118
+ font-size: 16px;
119
+ background-color: #2a2a2a;
120
+ color: #f0f0f0;
121
+ border: 1px solid #444;
122
+ border-radius: 5px;
123
+ }
124
+ #model-info {
125
+ margin-top: 10px;
126
+ font-size: 14px;
127
+ color: #aaa;
128
+ }
129
+ #logs {
130
+ background-color: #2a2a2a;
131
+ border: 1px solid #444;
132
+ border-radius: 5px;
133
+ padding: 10px;
134
+ height: 200px;
135
+ overflow-y: scroll;
136
+ font-family: monospace;
137
+ font-size: 14px;
138
+ }
139
+ #clear-logs {
140
+ margin-top: 10px;
141
+ font-size: 14px;
142
+ padding: 5px 10px;
143
+ }
144
+ #localVideo, #remoteVideo {
145
+ display: none;
146
+ }
147
+ </style>
148
+ </head>
149
+ <body>
150
+ <!-- Loading Spinner -->
151
+ <div id="loading">
152
+ <div class="spinner"></div>
153
+ </div>
154
+
155
+ <!-- Main Container -->
156
+ <div class="container">
157
+ <h1>Digital Human Voice Chat</h1>
158
+ <p class="subtitle">For best results, use headphones.</p>
159
+ <div id="chat-container">
160
+ <!-- Controls -->
161
+ <div id="controls">
162
+ <button id="startButton" disabled>Begin Call</button>
163
+ </div>
164
+
165
+ <!-- Configuration -->
166
+ <div id="configuration">
167
+ <select id="configSelect">
168
+ <option value="fastest">Fastest</option>
169
+ <option value="balanced">Balanced</option>
170
+ <option value="quality">Highest Quality</option>
171
+ </select>
172
+ <div id="model-info">
173
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: sshleifer/tiny-gpt2
174
+ </div>
175
+ </div>
176
+
177
+ <!-- Visualizer and Conversation -->
178
+ <div id="visualizer"></div>
179
+ <div id="conversation"></div>
180
+ </div>
181
+
182
+ <!-- Logs -->
183
+ <h2>Logs</h2>
184
+ <div id="logs"></div>
185
+ <button id="clear-logs">Clear</button>
186
+ </div>
187
+
188
+ <!-- Hidden Video Elements -->
189
+ <video id="localVideo" autoplay></video>
190
+ <video id="remoteVideo" autoplay></video>
191
+
192
+ <!-- JavaScript Code -->
193
+ <script type="module">
194
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
195
+
196
+ // Configure environment before initializing pipelines
197
+ env.localModelPath = './models';
198
+ env.backends = ['wasm'];
199
+ env.wasm = env.wasm || {};
200
+ env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
201
+ env.wasm.simd = true;
202
+ env.numThreads = navigator.hardwareConcurrency || 4;
203
+
204
+ // DOM Elements
205
+ const conversationDiv = document.getElementById('conversation');
206
+ const startButton = document.getElementById('startButton');
207
+ const visualizer = document.getElementById('visualizer');
208
+ const loadingDiv = document.getElementById('loading');
209
+ const logsDiv = document.getElementById('logs');
210
+ const clearLogsButton = document.getElementById('clear-logs');
211
+ const localVideo = document.getElementById('localVideo');
212
+ const remoteVideo = document.getElementById('remoteVideo');
213
+
214
+ // Variables
215
+ let myvad;
216
+ let sttPipeline;
217
+ let ttsPipeline;
218
+ let llmPipeline; // LLM Pipeline
219
+ let audioContext;
220
+ let analyser;
221
+ let dataArray;
222
+ let bars;
223
+ let animationId;
224
+ let isListening = false;
225
+ let microphoneStream;
226
+ let isSpeaking = false;
227
+ let currentAudioSource = null;
228
+ let rtcConnection = null;
229
+ let rtcLoopbackConnection = null;
230
+ let loopbackStream = new MediaStream();
231
+ let conversationHistory = {
232
+ past_user_inputs: [],
233
+ generated_responses: []
234
+ };
235
+
236
+ // Create Visualizer
237
+ function createVisualizer() {
238
+ const barCount = 64;
239
+ for (let i = 0; i < barCount; i++) {
240
+ const bar = document.createElement('div');
241
+ bar.className = 'bar';
242
+ visualizer.appendChild(bar);
243
+ }
244
+ bars = visualizer.getElementsByClassName('bar');
245
+ }
246
+
247
+ // Update Visualizer
248
+ function updateVisualizer() {
249
+ analyser.getByteFrequencyData(dataArray);
250
+ for (let i = 0; i < bars.length; i++) {
251
+ const barHeight = dataArray[i] / 2;
252
+ bars[i].style.height = barHeight + 'px';
253
+ }
254
+ animationId = setTimeout(updateVisualizer, 50);
255
+ }
256
+
257
+ // Initialize Pipelines
258
+ async function initializePipelines() {
259
+ try {
260
+ [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
261
+ pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
262
+ pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
263
+ pipeline('text-generation', 'sshleifer/tiny-gpt2', { quantized: true }) // LLM Pipeline
264
+ ]);
265
+
266
+ addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
267
+ startButton.disabled = false;
268
+ loadingDiv.style.display = 'none';
269
+ } catch (error) {
270
+ console.error('Error initializing pipelines:', error);
271
+ addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
272
+ loadingDiv.style.display = 'none';
273
+ }
274
+ }
275
+
276
+ // Process Speech
277
+ async function processSpeech(audio) {
278
+ try {
279
+ if (!sttPipeline || !ttsPipeline || !llmPipeline) {
280
+ throw new Error('Pipelines not initialized');
281
+ }
282
+
283
+ const transcription = await sttPipeline(audio);
284
+ addLog(`User: ${transcription.text}`);
285
+
286
+ // Generate Bot Response using LLM
287
+ const llmOutput = await llmPipeline(transcription.text, { max_length: 50 });
288
+ const botResponse = llmOutput[0].generated_text;
289
+ addLog(`Bot: ${botResponse}`);
290
+
291
+ isSpeaking = true;
292
+ const speechOutput = await ttsPipeline(botResponse);
293
+ await playAudio(speechOutput.audio);
294
+ isSpeaking = false;
295
+ } catch (error) {
296
+ console.error('Error processing speech:', error);
297
+ addLog('System: Error processing speech. Please try again.');
298
+ }
299
+ }
300
+
301
+ // Add Log
302
+ function addLog(message) {
303
+ const now = new Date();
304
+ const timestamp = now.toLocaleTimeString();
305
+ const logMessage = `[${timestamp}] ${message}`;
306
+ const messageElement = document.createElement('div');
307
+ messageElement.textContent = logMessage;
308
+ logsDiv.appendChild(messageElement);
309
+ logsDiv.scrollTop = logsDiv.scrollHeight;
310
+ }
311
+
312
+ // Play Audio
313
+ function playAudio(audioArray) {
314
+ return new Promise((resolve) => {
315
+ const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
316
+ const channelData = audioBuffer.getChannelData(0);
317
+ channelData.set(audioArray);
318
+
319
+ const source = audioContext.createBufferSource();
320
+ currentAudioSource = source;
321
+ source.buffer = audioBuffer;
322
+ source.connect(analyser);
323
+ analyser.connect(audioContext.destination);
324
+ source.start();
325
+ source.onended = () => {
326
+ currentAudioSource = null;
327
+ resolve();
328
+ };
329
+ });
330
+ }
331
+
332
+ // Stop Current Audio
333
+ function stopCurrentAudio() {
334
+ if (currentAudioSource) {
335
+ currentAudioSource.stop();
336
+ currentAudioSource = null;
337
+ }
338
+ }
339
+
340
+ // Toggle Listening
341
+ async function toggleListening() {
342
+ if (isListening) {
343
+ await stopListening();
344
+ } else {
345
+ await startListening();
346
+ }
347
+ }
348
+
349
+ // Start Listening
350
+ async function startListening() {
351
+ try {
352
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
353
+ analyser = audioContext.createAnalyser();
354
+ analyser.fftSize = 128;
355
+ dataArray = new Uint8Array(analyser.frequencyBinCount);
356
+
357
+ localVideo.volume = 0;
358
+ localVideo.muted = true;
359
+
360
+ remoteVideo.volume = 0;
361
+ remoteVideo.muted = true;
362
+
363
+ // Request Audio and Minimal Video for Echo Cancellation
364
+ microphoneStream = await navigator.mediaDevices.getUserMedia({
365
+ audio: true,
366
+ video: { width: 1, height: 1 }
367
+ });
368
+
369
+ localVideo.srcObject = microphoneStream;
370
+ await localVideo.play();
371
+
372
+ // Implement Loopback for Echo Cancellation
373
+ const offerOptions = {
374
+ offerToReceiveAudio: true,
375
+ offerToReceiveVideo: false,
376
+ };
377
+
378
+ rtcConnection = new RTCPeerConnection();
379
+ rtcLoopbackConnection = new RTCPeerConnection();
380
+
381
+ rtcConnection.onicecandidate = e => e.candidate && rtcLoopbackConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
382
+ rtcLoopbackConnection.onicecandidate = e => e.candidate && rtcConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
383
+
384
+ rtcLoopbackConnection.ontrack = e => e.streams[0].getTracks().forEach(track => loopbackStream.addTrack(track));
385
+
386
+ microphoneStream.getTracks().forEach(track => rtcConnection.addTrack(track, microphoneStream));
387
+
388
+ const offer = await rtcConnection.createOffer(offerOptions);
389
+ await rtcConnection.setLocalDescription(offer);
390
+ await rtcLoopbackConnection.setRemoteDescription(offer);
391
+ const answer = await rtcLoopbackConnection.createAnswer();
392
+ await rtcLoopbackConnection.setLocalDescription(answer);
393
+ await rtcConnection.setRemoteDescription(answer);
394
+
395
+ // Use Loopback Stream for Audio Processing
396
+ const source = audioContext.createMediaStreamSource(loopbackStream);
397
+ source.connect(analyser);
398
+
399
+ myvad = await vad.MicVAD.new({
400
+ noiseSuppression: true,
401
+ aggressiveness: 3,
402
+ onSpeechStart: () => {
403
+ addLog('--- Voice activity: speech start');
404
+ updateVisualizer();
405
+ if (isSpeaking) {
406
+ addLog('User interrupted. Stopping bot speech.');
407
+ stopCurrentAudio();
408
+ isSpeaking = false;
409
+ }
410
+ },
411
+ onSpeechEnd: (audio) => {
412
+ addLog('--- Voice activity: speech end');
413
+ cancelAnimationFrame(animationId);
414
+ processSpeech(audio);
415
+ }
416
+ });
417
+
418
+ await myvad.start();
419
+ startButton.textContent = 'End Call';
420
+ isListening = true;
421
+ addLog('System: Listening...');
422
+ } catch (error) {
423
+ console.error('Error starting voice activity:', error);
424
+ addLog('System: Error starting voice detection. Please check your microphone and try again.');
425
+ }
426
+ }
427
+
428
+ // Stop Listening
429
+ async function stopListening() {
430
+ if (myvad) {
431
+ try {
432
+ await myvad.destroy();
433
+ } catch (error) {
434
+ console.error('Error stopping voice activity:', error);
435
+ }
436
+ myvad = null;
437
+ }
438
+ if (microphoneStream) {
439
+ microphoneStream.getTracks().forEach(track => track.stop());
440
+ microphoneStream = null;
441
+ }
442
+ if (audioContext) {
443
+ await audioContext.close();
444
+ audioContext = null;
445
+ }
446
+ if (localVideo) {
447
+ localVideo.srcObject = null;
448
+ }
449
+ if (remoteVideo) {
450
+ remoteVideo.srcObject = null;
451
+ }
452
+ if (rtcConnection) {
453
+ rtcConnection.close();
454
+ rtcConnection = null;
455
+ }
456
+ if (rtcLoopbackConnection) {
457
+ rtcLoopbackConnection.close();
458
+ rtcLoopbackConnection = null;
459
+ }
460
+ loopbackStream = new MediaStream();
461
+ stopCurrentAudio();
462
+ startButton.textContent = 'Begin Call';
463
+ isListening = false;
464
+ addLog('System: Stopped listening.');
465
+ cancelAnimationFrame(animationId);
466
+ addLog('System: Microphone closed');
467
+ }
468
+
469
+ // Event Listeners
470
+ startButton.addEventListener('click', toggleListening);
471
+ clearLogsButton.addEventListener('click', () => {
472
+ logsDiv.innerHTML = '';
473
+ });
474
+
475
+ // Initialize
476
+ createVisualizer();
477
+ initializePipelines();
478
+ </script>
479
+ </body>
480
+ </html>