atlury commited on
Commit
eacac69
·
verified ·
1 Parent(s): 2343fbf

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +21 -50
index.html CHANGED
@@ -1,17 +1,14 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
4
- <!-- Meta and Title -->
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>Digital Human Voice Chat with LLM Integration</title>
8
-
9
- <!-- External Scripts -->
10
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
11
  <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
12
  <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
 
13
 
14
- <!-- Styles -->
15
  <style>
16
  body {
17
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
@@ -147,22 +144,16 @@
147
  </style>
148
  </head>
149
  <body>
150
- <!-- Loading Spinner -->
151
  <div id="loading">
152
  <div class="spinner"></div>
153
  </div>
154
-
155
- <!-- Main Container -->
156
  <div class="container">
157
  <h1>Digital Human Voice Chat</h1>
158
  <p class="subtitle">For best results, use headphones.</p>
159
  <div id="chat-container">
160
- <!-- Controls -->
161
  <div id="controls">
162
  <button id="startButton" disabled>Begin Call</button>
163
  </div>
164
-
165
- <!-- Configuration -->
166
  <div id="configuration">
167
  <select id="configSelect">
168
  <option value="fastest">Fastest</option>
@@ -170,38 +161,31 @@
170
  <option value="quality">Highest Quality</option>
171
  </select>
172
  <div id="model-info">
173
- TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: sshleifer/tiny-gpt2
174
  </div>
175
  </div>
176
-
177
- <!-- Visualizer and Conversation -->
178
  <div id="visualizer"></div>
179
  <div id="conversation"></div>
180
  </div>
181
-
182
- <!-- Logs -->
183
  <h2>Logs</h2>
184
  <div id="logs"></div>
185
  <button id="clear-logs">Clear</button>
186
  </div>
187
-
188
- <!-- Hidden Video Elements -->
189
  <video id="localVideo" autoplay></video>
190
  <video id="remoteVideo" autoplay></video>
191
 
192
- <!-- JavaScript Code -->
193
  <script type="module">
194
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
195
 
196
- // Configure environment before initializing pipelines
197
  env.localModelPath = './models';
 
 
198
  env.backends = ['wasm'];
199
  env.wasm = env.wasm || {};
200
  env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
201
  env.wasm.simd = true;
202
  env.numThreads = navigator.hardwareConcurrency || 4;
203
 
204
- // DOM Elements
205
  const conversationDiv = document.getElementById('conversation');
206
  const startButton = document.getElementById('startButton');
207
  const visualizer = document.getElementById('visualizer');
@@ -211,11 +195,10 @@
211
  const localVideo = document.getElementById('localVideo');
212
  const remoteVideo = document.getElementById('remoteVideo');
213
 
214
- // Variables
215
  let myvad;
216
  let sttPipeline;
217
  let ttsPipeline;
218
- let llmPipeline; // LLM Pipeline
219
  let audioContext;
220
  let analyser;
221
  let dataArray;
@@ -228,12 +211,7 @@
228
  let rtcConnection = null;
229
  let rtcLoopbackConnection = null;
230
  let loopbackStream = new MediaStream();
231
- let conversationHistory = {
232
- past_user_inputs: [],
233
- generated_responses: []
234
- };
235
 
236
- // Create Visualizer
237
  function createVisualizer() {
238
  const barCount = 64;
239
  for (let i = 0; i < barCount; i++) {
@@ -244,7 +222,6 @@
244
  bars = visualizer.getElementsByClassName('bar');
245
  }
246
 
247
- // Update Visualizer
248
  function updateVisualizer() {
249
  analyser.getByteFrequencyData(dataArray);
250
  for (let i = 0; i < bars.length; i++) {
@@ -254,16 +231,15 @@
254
  animationId = setTimeout(updateVisualizer, 50);
255
  }
256
 
257
- // Initialize Pipelines
258
  async function initializePipelines() {
259
  try {
260
  [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
261
  pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
262
  pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
263
- pipeline('text-generation', 'sshleifer/tiny-gpt2', { quantized: true }) // LLM Pipeline
264
  ]);
265
-
266
- addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
267
  startButton.disabled = false;
268
  loadingDiv.style.display = 'none';
269
  } catch (error) {
@@ -273,7 +249,6 @@
273
  }
274
  }
275
 
276
- // Process Speech
277
  async function processSpeech(audio) {
278
  try {
279
  if (!sttPipeline || !ttsPipeline || !llmPipeline) {
@@ -282,10 +257,12 @@
282
 
283
  const transcription = await sttPipeline(audio);
284
  addLog(`User: ${transcription.text}`);
285
-
286
- // Generate Bot Response using LLM
287
- const llmOutput = await llmPipeline(transcription.text, { max_length: 50 });
288
- const botResponse = llmOutput[0].generated_text;
 
 
289
  addLog(`Bot: ${botResponse}`);
290
 
291
  isSpeaking = true;
@@ -298,7 +275,6 @@
298
  }
299
  }
300
 
301
- // Add Log
302
  function addLog(message) {
303
  const now = new Date();
304
  const timestamp = now.toLocaleTimeString();
@@ -309,7 +285,6 @@
309
  logsDiv.scrollTop = logsDiv.scrollHeight;
310
  }
311
 
312
- // Play Audio
313
  function playAudio(audioArray) {
314
  return new Promise((resolve) => {
315
  const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
@@ -329,7 +304,6 @@
329
  });
330
  }
331
 
332
- // Stop Current Audio
333
  function stopCurrentAudio() {
334
  if (currentAudioSource) {
335
  currentAudioSource.stop();
@@ -337,7 +311,6 @@
337
  }
338
  }
339
 
340
- // Toggle Listening
341
  async function toggleListening() {
342
  if (isListening) {
343
  await stopListening();
@@ -346,7 +319,6 @@
346
  }
347
  }
348
 
349
- // Start Listening
350
  async function startListening() {
351
  try {
352
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
@@ -356,11 +328,12 @@
356
 
357
  localVideo.volume = 0;
358
  localVideo.muted = true;
 
359
 
360
  remoteVideo.volume = 0;
361
  remoteVideo.muted = true;
 
362
 
363
- // Request Audio and Minimal Video for Echo Cancellation
364
  microphoneStream = await navigator.mediaDevices.getUserMedia({
365
  audio: true,
366
  video: { width: 1, height: 1 }
@@ -369,7 +342,9 @@
369
  localVideo.srcObject = microphoneStream;
370
  await localVideo.play();
371
 
372
- // Implement Loopback for Echo Cancellation
 
 
373
  const offerOptions = {
374
  offerToReceiveAudio: true,
375
  offerToReceiveVideo: false,
@@ -392,7 +367,6 @@
392
  await rtcLoopbackConnection.setLocalDescription(answer);
393
  await rtcConnection.setRemoteDescription(answer);
394
 
395
- // Use Loopback Stream for Audio Processing
396
  const source = audioContext.createMediaStreamSource(loopbackStream);
397
  source.connect(analyser);
398
 
@@ -425,7 +399,6 @@
425
  }
426
  }
427
 
428
- // Stop Listening
429
  async function stopListening() {
430
  if (myvad) {
431
  try {
@@ -466,15 +439,13 @@
466
  addLog('System: Microphone closed');
467
  }
468
 
469
- // Event Listeners
470
  startButton.addEventListener('click', toggleListening);
471
  clearLogsButton.addEventListener('click', () => {
472
  logsDiv.innerHTML = '';
473
  });
474
 
475
- // Initialize
476
  createVisualizer();
477
  initializePipelines();
478
  </script>
479
  </body>
480
- </html>
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
 
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Voice Chat Bot with Advanced Echo Cancellation and TinyLLM</title>
 
 
7
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
  <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
9
  <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
10
+
11
 
 
12
  <style>
13
  body {
14
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 
144
  </style>
145
  </head>
146
  <body>
 
147
  <div id="loading">
148
  <div class="spinner"></div>
149
  </div>
 
 
150
  <div class="container">
151
  <h1>Digital Human Voice Chat</h1>
152
  <p class="subtitle">For best results, use headphones.</p>
153
  <div id="chat-container">
 
154
  <div id="controls">
155
  <button id="startButton" disabled>Begin Call</button>
156
  </div>
 
 
157
  <div id="configuration">
158
  <select id="configSelect">
159
  <option value="fastest">Fastest</option>
 
161
  <option value="quality">Highest Quality</option>
162
  </select>
163
  <div id="model-info">
164
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
165
  </div>
166
  </div>
 
 
167
  <div id="visualizer"></div>
168
  <div id="conversation"></div>
169
  </div>
 
 
170
  <h2>Logs</h2>
171
  <div id="logs"></div>
172
  <button id="clear-logs">Clear</button>
173
  </div>
 
 
174
  <video id="localVideo" autoplay></video>
175
  <video id="remoteVideo" autoplay></video>
176
 
 
177
  <script type="module">
178
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
179
 
 
180
  env.localModelPath = './models';
181
+
182
+ // Configure environment before initializing pipelines
183
  env.backends = ['wasm'];
184
  env.wasm = env.wasm || {};
185
  env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
186
  env.wasm.simd = true;
187
  env.numThreads = navigator.hardwareConcurrency || 4;
188
 
 
189
  const conversationDiv = document.getElementById('conversation');
190
  const startButton = document.getElementById('startButton');
191
  const visualizer = document.getElementById('visualizer');
 
195
  const localVideo = document.getElementById('localVideo');
196
  const remoteVideo = document.getElementById('remoteVideo');
197
 
 
198
  let myvad;
199
  let sttPipeline;
200
  let ttsPipeline;
201
+ let llmPipeline;
202
  let audioContext;
203
  let analyser;
204
  let dataArray;
 
211
  let rtcConnection = null;
212
  let rtcLoopbackConnection = null;
213
  let loopbackStream = new MediaStream();
 
 
 
 
214
 
 
215
  function createVisualizer() {
216
  const barCount = 64;
217
  for (let i = 0; i < barCount; i++) {
 
222
  bars = visualizer.getElementsByClassName('bar');
223
  }
224
 
 
225
  function updateVisualizer() {
226
  analyser.getByteFrequencyData(dataArray);
227
  for (let i = 0; i < bars.length; i++) {
 
231
  animationId = setTimeout(updateVisualizer, 50);
232
  }
233
 
 
234
  async function initializePipelines() {
235
  try {
236
  [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
237
  pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
238
  pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
239
+ pipeline('text-generation', 'Xenova/tiny-llm')
240
  ]);
241
+
242
+ addLog('System: Digital Human Voice Chat initialized with TinyLLM. Click "Begin Call" to start.');
243
  startButton.disabled = false;
244
  loadingDiv.style.display = 'none';
245
  } catch (error) {
 
249
  }
250
  }
251
 
 
252
  async function processSpeech(audio) {
253
  try {
254
  if (!sttPipeline || !ttsPipeline || !llmPipeline) {
 
257
 
258
  const transcription = await sttPipeline(audio);
259
  addLog(`User: ${transcription.text}`);
260
+
261
+ const llmResponse = await llmPipeline(transcription.text, {
262
+ max_new_tokens: 50,
263
+ temperature: 0.7
264
+ });
265
+ const botResponse = llmResponse[0].generated_text;
266
  addLog(`Bot: ${botResponse}`);
267
 
268
  isSpeaking = true;
 
275
  }
276
  }
277
 
 
278
  function addLog(message) {
279
  const now = new Date();
280
  const timestamp = now.toLocaleTimeString();
 
285
  logsDiv.scrollTop = logsDiv.scrollHeight;
286
  }
287
 
 
288
  function playAudio(audioArray) {
289
  return new Promise((resolve) => {
290
  const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
 
304
  });
305
  }
306
 
 
307
  function stopCurrentAudio() {
308
  if (currentAudioSource) {
309
  currentAudioSource.stop();
 
311
  }
312
  }
313
 
 
314
  async function toggleListening() {
315
  if (isListening) {
316
  await stopListening();
 
319
  }
320
  }
321
 
 
322
  async function startListening() {
323
  try {
324
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
 
328
 
329
  localVideo.volume = 0;
330
  localVideo.muted = true;
331
+ document.getElementById('localVideo').volume = 0;
332
 
333
  remoteVideo.volume = 0;
334
  remoteVideo.muted = true;
335
+ document.getElementById('remoteVideo').volume = 0;
336
 
 
337
  microphoneStream = await navigator.mediaDevices.getUserMedia({
338
  audio: true,
339
  video: { width: 1, height: 1 }
 
342
  localVideo.srcObject = microphoneStream;
343
  await localVideo.play();
344
 
345
+ console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
346
+ console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
347
+
348
  const offerOptions = {
349
  offerToReceiveAudio: true,
350
  offerToReceiveVideo: false,
 
367
  await rtcLoopbackConnection.setLocalDescription(answer);
368
  await rtcConnection.setRemoteDescription(answer);
369
 
 
370
  const source = audioContext.createMediaStreamSource(loopbackStream);
371
  source.connect(analyser);
372
 
 
399
  }
400
  }
401
 
 
402
  async function stopListening() {
403
  if (myvad) {
404
  try {
 
439
  addLog('System: Microphone closed');
440
  }
441
 
 
442
  startButton.addEventListener('click', toggleListening);
443
  clearLogsButton.addEventListener('click', () => {
444
  logsDiv.innerHTML = '';
445
  });
446
 
 
447
  createVisualizer();
448
  initializePipelines();
449
  </script>
450
  </body>
451
+ </html>