Spaces:
Running
Running
Update index.html
Browse files- index.html +21 -50
index.html
CHANGED
@@ -1,17 +1,14 @@
|
|
1 |
<!DOCTYPE html>
|
2 |
<html lang="en">
|
3 |
<head>
|
4 |
-
<!-- Meta and Title -->
|
5 |
<meta charset="UTF-8">
|
6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7 |
-
<title>
|
8 |
-
|
9 |
-
<!-- External Scripts -->
|
10 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
|
11 |
<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
|
12 |
<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
|
|
|
13 |
|
14 |
-
<!-- Styles -->
|
15 |
<style>
|
16 |
body {
|
17 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
@@ -147,22 +144,16 @@
|
|
147 |
</style>
|
148 |
</head>
|
149 |
<body>
|
150 |
-
<!-- Loading Spinner -->
|
151 |
<div id="loading">
|
152 |
<div class="spinner"></div>
|
153 |
</div>
|
154 |
-
|
155 |
-
<!-- Main Container -->
|
156 |
<div class="container">
|
157 |
<h1>Digital Human Voice Chat</h1>
|
158 |
<p class="subtitle">For best results, use headphones.</p>
|
159 |
<div id="chat-container">
|
160 |
-
<!-- Controls -->
|
161 |
<div id="controls">
|
162 |
<button id="startButton" disabled>Begin Call</button>
|
163 |
</div>
|
164 |
-
|
165 |
-
<!-- Configuration -->
|
166 |
<div id="configuration">
|
167 |
<select id="configSelect">
|
168 |
<option value="fastest">Fastest</option>
|
@@ -170,38 +161,31 @@
|
|
170 |
<option value="quality">Highest Quality</option>
|
171 |
</select>
|
172 |
<div id="model-info">
|
173 |
-
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM:
|
174 |
</div>
|
175 |
</div>
|
176 |
-
|
177 |
-
<!-- Visualizer and Conversation -->
|
178 |
<div id="visualizer"></div>
|
179 |
<div id="conversation"></div>
|
180 |
</div>
|
181 |
-
|
182 |
-
<!-- Logs -->
|
183 |
<h2>Logs</h2>
|
184 |
<div id="logs"></div>
|
185 |
<button id="clear-logs">Clear</button>
|
186 |
</div>
|
187 |
-
|
188 |
-
<!-- Hidden Video Elements -->
|
189 |
<video id="localVideo" autoplay></video>
|
190 |
<video id="remoteVideo" autoplay></video>
|
191 |
|
192 |
-
<!-- JavaScript Code -->
|
193 |
<script type="module">
|
194 |
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
|
195 |
|
196 |
-
// Configure environment before initializing pipelines
|
197 |
env.localModelPath = './models';
|
|
|
|
|
198 |
env.backends = ['wasm'];
|
199 |
env.wasm = env.wasm || {};
|
200 |
env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
|
201 |
env.wasm.simd = true;
|
202 |
env.numThreads = navigator.hardwareConcurrency || 4;
|
203 |
|
204 |
-
// DOM Elements
|
205 |
const conversationDiv = document.getElementById('conversation');
|
206 |
const startButton = document.getElementById('startButton');
|
207 |
const visualizer = document.getElementById('visualizer');
|
@@ -211,11 +195,10 @@
|
|
211 |
const localVideo = document.getElementById('localVideo');
|
212 |
const remoteVideo = document.getElementById('remoteVideo');
|
213 |
|
214 |
-
// Variables
|
215 |
let myvad;
|
216 |
let sttPipeline;
|
217 |
let ttsPipeline;
|
218 |
-
let llmPipeline;
|
219 |
let audioContext;
|
220 |
let analyser;
|
221 |
let dataArray;
|
@@ -228,12 +211,7 @@
|
|
228 |
let rtcConnection = null;
|
229 |
let rtcLoopbackConnection = null;
|
230 |
let loopbackStream = new MediaStream();
|
231 |
-
let conversationHistory = {
|
232 |
-
past_user_inputs: [],
|
233 |
-
generated_responses: []
|
234 |
-
};
|
235 |
|
236 |
-
// Create Visualizer
|
237 |
function createVisualizer() {
|
238 |
const barCount = 64;
|
239 |
for (let i = 0; i < barCount; i++) {
|
@@ -244,7 +222,6 @@
|
|
244 |
bars = visualizer.getElementsByClassName('bar');
|
245 |
}
|
246 |
|
247 |
-
// Update Visualizer
|
248 |
function updateVisualizer() {
|
249 |
analyser.getByteFrequencyData(dataArray);
|
250 |
for (let i = 0; i < bars.length; i++) {
|
@@ -254,16 +231,15 @@
|
|
254 |
animationId = setTimeout(updateVisualizer, 50);
|
255 |
}
|
256 |
|
257 |
-
// Initialize Pipelines
|
258 |
async function initializePipelines() {
|
259 |
try {
|
260 |
[sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
|
261 |
pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
|
262 |
pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
|
263 |
-
pipeline('text-generation', '
|
264 |
]);
|
265 |
-
|
266 |
-
addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
|
267 |
startButton.disabled = false;
|
268 |
loadingDiv.style.display = 'none';
|
269 |
} catch (error) {
|
@@ -273,7 +249,6 @@
|
|
273 |
}
|
274 |
}
|
275 |
|
276 |
-
// Process Speech
|
277 |
async function processSpeech(audio) {
|
278 |
try {
|
279 |
if (!sttPipeline || !ttsPipeline || !llmPipeline) {
|
@@ -282,10 +257,12 @@
|
|
282 |
|
283 |
const transcription = await sttPipeline(audio);
|
284 |
addLog(`User: ${transcription.text}`);
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
|
|
|
|
289 |
addLog(`Bot: ${botResponse}`);
|
290 |
|
291 |
isSpeaking = true;
|
@@ -298,7 +275,6 @@
|
|
298 |
}
|
299 |
}
|
300 |
|
301 |
-
// Add Log
|
302 |
function addLog(message) {
|
303 |
const now = new Date();
|
304 |
const timestamp = now.toLocaleTimeString();
|
@@ -309,7 +285,6 @@
|
|
309 |
logsDiv.scrollTop = logsDiv.scrollHeight;
|
310 |
}
|
311 |
|
312 |
-
// Play Audio
|
313 |
function playAudio(audioArray) {
|
314 |
return new Promise((resolve) => {
|
315 |
const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
|
@@ -329,7 +304,6 @@
|
|
329 |
});
|
330 |
}
|
331 |
|
332 |
-
// Stop Current Audio
|
333 |
function stopCurrentAudio() {
|
334 |
if (currentAudioSource) {
|
335 |
currentAudioSource.stop();
|
@@ -337,7 +311,6 @@
|
|
337 |
}
|
338 |
}
|
339 |
|
340 |
-
// Toggle Listening
|
341 |
async function toggleListening() {
|
342 |
if (isListening) {
|
343 |
await stopListening();
|
@@ -346,7 +319,6 @@
|
|
346 |
}
|
347 |
}
|
348 |
|
349 |
-
// Start Listening
|
350 |
async function startListening() {
|
351 |
try {
|
352 |
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
@@ -356,11 +328,12 @@
|
|
356 |
|
357 |
localVideo.volume = 0;
|
358 |
localVideo.muted = true;
|
|
|
359 |
|
360 |
remoteVideo.volume = 0;
|
361 |
remoteVideo.muted = true;
|
|
|
362 |
|
363 |
-
// Request Audio and Minimal Video for Echo Cancellation
|
364 |
microphoneStream = await navigator.mediaDevices.getUserMedia({
|
365 |
audio: true,
|
366 |
video: { width: 1, height: 1 }
|
@@ -369,7 +342,9 @@
|
|
369 |
localVideo.srcObject = microphoneStream;
|
370 |
await localVideo.play();
|
371 |
|
372 |
-
|
|
|
|
|
373 |
const offerOptions = {
|
374 |
offerToReceiveAudio: true,
|
375 |
offerToReceiveVideo: false,
|
@@ -392,7 +367,6 @@
|
|
392 |
await rtcLoopbackConnection.setLocalDescription(answer);
|
393 |
await rtcConnection.setRemoteDescription(answer);
|
394 |
|
395 |
-
// Use Loopback Stream for Audio Processing
|
396 |
const source = audioContext.createMediaStreamSource(loopbackStream);
|
397 |
source.connect(analyser);
|
398 |
|
@@ -425,7 +399,6 @@
|
|
425 |
}
|
426 |
}
|
427 |
|
428 |
-
// Stop Listening
|
429 |
async function stopListening() {
|
430 |
if (myvad) {
|
431 |
try {
|
@@ -466,15 +439,13 @@
|
|
466 |
addLog('System: Microphone closed');
|
467 |
}
|
468 |
|
469 |
-
// Event Listeners
|
470 |
startButton.addEventListener('click', toggleListening);
|
471 |
clearLogsButton.addEventListener('click', () => {
|
472 |
logsDiv.innerHTML = '';
|
473 |
});
|
474 |
|
475 |
-
// Initialize
|
476 |
createVisualizer();
|
477 |
initializePipelines();
|
478 |
</script>
|
479 |
</body>
|
480 |
-
</html>
|
|
|
1 |
<!DOCTYPE html>
|
2 |
<html lang="en">
|
3 |
<head>
|
|
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Voice Chat Bot with Advanced Echo Cancellation and TinyLLM</title>
|
|
|
|
|
7 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
|
8 |
<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
|
9 |
<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
|
10 |
+
|
11 |
|
|
|
12 |
<style>
|
13 |
body {
|
14 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
|
|
144 |
</style>
|
145 |
</head>
|
146 |
<body>
|
|
|
147 |
<div id="loading">
|
148 |
<div class="spinner"></div>
|
149 |
</div>
|
|
|
|
|
150 |
<div class="container">
|
151 |
<h1>Digital Human Voice Chat</h1>
|
152 |
<p class="subtitle">For best results, use headphones.</p>
|
153 |
<div id="chat-container">
|
|
|
154 |
<div id="controls">
|
155 |
<button id="startButton" disabled>Begin Call</button>
|
156 |
</div>
|
|
|
|
|
157 |
<div id="configuration">
|
158 |
<select id="configSelect">
|
159 |
<option value="fastest">Fastest</option>
|
|
|
161 |
<option value="quality">Highest Quality</option>
|
162 |
</select>
|
163 |
<div id="model-info">
|
164 |
+
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
|
165 |
</div>
|
166 |
</div>
|
|
|
|
|
167 |
<div id="visualizer"></div>
|
168 |
<div id="conversation"></div>
|
169 |
</div>
|
|
|
|
|
170 |
<h2>Logs</h2>
|
171 |
<div id="logs"></div>
|
172 |
<button id="clear-logs">Clear</button>
|
173 |
</div>
|
|
|
|
|
174 |
<video id="localVideo" autoplay></video>
|
175 |
<video id="remoteVideo" autoplay></video>
|
176 |
|
|
|
177 |
<script type="module">
|
178 |
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
|
179 |
|
|
|
180 |
env.localModelPath = './models';
|
181 |
+
|
182 |
+
// Configure environment before initializing pipelines
|
183 |
env.backends = ['wasm'];
|
184 |
env.wasm = env.wasm || {};
|
185 |
env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
|
186 |
env.wasm.simd = true;
|
187 |
env.numThreads = navigator.hardwareConcurrency || 4;
|
188 |
|
|
|
189 |
const conversationDiv = document.getElementById('conversation');
|
190 |
const startButton = document.getElementById('startButton');
|
191 |
const visualizer = document.getElementById('visualizer');
|
|
|
195 |
const localVideo = document.getElementById('localVideo');
|
196 |
const remoteVideo = document.getElementById('remoteVideo');
|
197 |
|
|
|
198 |
let myvad;
|
199 |
let sttPipeline;
|
200 |
let ttsPipeline;
|
201 |
+
let llmPipeline;
|
202 |
let audioContext;
|
203 |
let analyser;
|
204 |
let dataArray;
|
|
|
211 |
let rtcConnection = null;
|
212 |
let rtcLoopbackConnection = null;
|
213 |
let loopbackStream = new MediaStream();
|
|
|
|
|
|
|
|
|
214 |
|
|
|
215 |
function createVisualizer() {
|
216 |
const barCount = 64;
|
217 |
for (let i = 0; i < barCount; i++) {
|
|
|
222 |
bars = visualizer.getElementsByClassName('bar');
|
223 |
}
|
224 |
|
|
|
225 |
function updateVisualizer() {
|
226 |
analyser.getByteFrequencyData(dataArray);
|
227 |
for (let i = 0; i < bars.length; i++) {
|
|
|
231 |
animationId = setTimeout(updateVisualizer, 50);
|
232 |
}
|
233 |
|
|
|
234 |
async function initializePipelines() {
|
235 |
try {
|
236 |
[sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
|
237 |
pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
|
238 |
pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
|
239 |
+
pipeline('text-generation', 'Xenova/tiny-llm')
|
240 |
]);
|
241 |
+
|
242 |
+
addLog('System: Digital Human Voice Chat initialized with TinyLLM. Click "Begin Call" to start.');
|
243 |
startButton.disabled = false;
|
244 |
loadingDiv.style.display = 'none';
|
245 |
} catch (error) {
|
|
|
249 |
}
|
250 |
}
|
251 |
|
|
|
252 |
async function processSpeech(audio) {
|
253 |
try {
|
254 |
if (!sttPipeline || !ttsPipeline || !llmPipeline) {
|
|
|
257 |
|
258 |
const transcription = await sttPipeline(audio);
|
259 |
addLog(`User: ${transcription.text}`);
|
260 |
+
|
261 |
+
const llmResponse = await llmPipeline(transcription.text, {
|
262 |
+
max_new_tokens: 50,
|
263 |
+
temperature: 0.7
|
264 |
+
});
|
265 |
+
const botResponse = llmResponse[0].generated_text;
|
266 |
addLog(`Bot: ${botResponse}`);
|
267 |
|
268 |
isSpeaking = true;
|
|
|
275 |
}
|
276 |
}
|
277 |
|
|
|
278 |
function addLog(message) {
|
279 |
const now = new Date();
|
280 |
const timestamp = now.toLocaleTimeString();
|
|
|
285 |
logsDiv.scrollTop = logsDiv.scrollHeight;
|
286 |
}
|
287 |
|
|
|
288 |
function playAudio(audioArray) {
|
289 |
return new Promise((resolve) => {
|
290 |
const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
|
|
|
304 |
});
|
305 |
}
|
306 |
|
|
|
307 |
function stopCurrentAudio() {
|
308 |
if (currentAudioSource) {
|
309 |
currentAudioSource.stop();
|
|
|
311 |
}
|
312 |
}
|
313 |
|
|
|
314 |
async function toggleListening() {
|
315 |
if (isListening) {
|
316 |
await stopListening();
|
|
|
319 |
}
|
320 |
}
|
321 |
|
|
|
322 |
async function startListening() {
|
323 |
try {
|
324 |
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
|
328 |
|
329 |
localVideo.volume = 0;
|
330 |
localVideo.muted = true;
|
331 |
+
document.getElementById('localVideo').volume = 0;
|
332 |
|
333 |
remoteVideo.volume = 0;
|
334 |
remoteVideo.muted = true;
|
335 |
+
document.getElementById('remoteVideo').volume = 0;
|
336 |
|
|
|
337 |
microphoneStream = await navigator.mediaDevices.getUserMedia({
|
338 |
audio: true,
|
339 |
video: { width: 1, height: 1 }
|
|
|
342 |
localVideo.srcObject = microphoneStream;
|
343 |
await localVideo.play();
|
344 |
|
345 |
+
console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
|
346 |
+
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
|
347 |
+
|
348 |
const offerOptions = {
|
349 |
offerToReceiveAudio: true,
|
350 |
offerToReceiveVideo: false,
|
|
|
367 |
await rtcLoopbackConnection.setLocalDescription(answer);
|
368 |
await rtcConnection.setRemoteDescription(answer);
|
369 |
|
|
|
370 |
const source = audioContext.createMediaStreamSource(loopbackStream);
|
371 |
source.connect(analyser);
|
372 |
|
|
|
399 |
}
|
400 |
}
|
401 |
|
|
|
402 |
async function stopListening() {
|
403 |
if (myvad) {
|
404 |
try {
|
|
|
439 |
addLog('System: Microphone closed');
|
440 |
}
|
441 |
|
|
|
442 |
startButton.addEventListener('click', toggleListening);
|
443 |
clearLogsButton.addEventListener('click', () => {
|
444 |
logsDiv.innerHTML = '';
|
445 |
});
|
446 |
|
|
|
447 |
createVisualizer();
|
448 |
initializePipelines();
|
449 |
</script>
|
450 |
</body>
|
451 |
+
</html>
|