Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

digitalhuman / index.backup3.html

atlury

Rename index.html to index.backup3.html

645df8b verified 4 months ago

raw

history blame contribute delete

13.7 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Voice Chat Bot</title>
	<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
	<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
	<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
	<style>
	body {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	margin: 0;
	padding: 20px;
	background-color: #1a1a1a;
	color: #f0f0f0;
	}
	.container {
	max-width: 800px;
	margin: 0 auto;
	}
	h1 {
	color: #ffd700;
	text-align: center;
	margin-bottom: 10px;
	}
	.subtitle {
	text-align: center;
	color: #ffd700;
	margin-bottom: 20px;
	}
	#chat-container {
	display: flex;
	flex-direction: column;
	height: 70vh;
	}
	#conversation {
	flex-grow: 1;
	border: 1px solid #444;
	padding: 10px;
	overflow-y: scroll;
	background-color: #2a2a2a;
	border-radius: 5px;
	margin-bottom: 20px;
	}
	#controls {
	display: flex;
	justify-content: center;
	margin-bottom: 20px;
	}
	button {
	font-size: 18px;
	padding: 10px 20px;
	background-color: #ffd700;
	color: #1a1a1a;
	border: none;
	border-radius: 5px;
	cursor: pointer;
	transition: background-color 0.3s;
	}
	button:hover {
	background-color: #ffec8b;
	}
	button:disabled {
	background-color: #666;
	cursor: not-allowed;
	}
	#visualizer {
	width: 100%;
	height: 100px;
	background-color: #2a2a2a;
	border-radius: 5px;
	overflow: hidden;
	margin-bottom: 20px;
	}
	.bar {
	width: 5px;
	height: 100%;
	background-color: #ffd700;
	display: inline-block;
	margin-right: 1px;
	}
	#loading {
	position: fixed;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-color: rgba(0, 0, 0, 0.8);
	display: flex;
	justify-content: center;
	align-items: center;
	z-index: 1000;
	}
	.spinner {
	width: 50px;
	height: 50px;
	border: 5px solid #f3f3f3;
	border-top: 5px solid #ffd700;
	border-radius: 50%;
	animation: spin 1s linear infinite;
	}
	@keyframes spin {
	0% { transform: rotate(0deg); }
	100% { transform: rotate(360deg); }
	}
	#configuration {
	margin-bottom: 20px;
	}
	select {
	width: 100%;
	padding: 10px;
	font-size: 16px;
	background-color: #2a2a2a;
	color: #f0f0f0;
	border: 1px solid #444;
	border-radius: 5px;
	}
	#model-info {
	margin-top: 10px;
	font-size: 14px;
	color: #aaa;
	}
	#logs {
	background-color: #2a2a2a;
	border: 1px solid #444;
	border-radius: 5px;
	padding: 10px;
	height: 200px;
	overflow-y: scroll;
	font-family: monospace;
	font-size: 14px;
	}
	#clear-logs {
	margin-top: 10px;
	font-size: 14px;
	padding: 5px 10px;
	}
	</style>
	</head>
	<body>
	<div id="loading">
	<div class="spinner"></div>
	</div>
	<div class="container">
	<h1>Voice Chat Bot Demo</h1>
	<p class="subtitle">For best results, use headphones.</p>
	<div id="chat-container">
	<div id="controls">
	<button id="startButton" disabled>Begin Call</button>
	</div>
	<div id="configuration">
	<select id="configSelect">
	<option value="fastest">Fastest</option>
	<option value="balanced">Balanced</option>
	<option value="quality">Highest Quality</option>
	</select>
	<div id="model-info">
	TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
	</div>
	</div>
	<div id="visualizer"></div>
	<div id="conversation"></div>
	</div>
	<h2>Logs</h2>
	<div id="logs"></div>
	<button id="clear-logs">Clear</button>
	</div>

	<script type="module">
	import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';

	env.localModelPath = './models';

	const conversationDiv = document.getElementById('conversation');
	const startButton = document.getElementById('startButton');
	const visualizer = document.getElementById('visualizer');
	const loadingDiv = document.getElementById('loading');
	const logsDiv = document.getElementById('logs');
	const clearLogsButton = document.getElementById('clear-logs');

	let myvad;
	let sttPipeline;
	let ttsPipeline;
	let audioContext;
	let analyser;
	let dataArray;
	let bars;
	let animationId;
	let isListening = false;
	let microphoneStream;
	let isSpeaking = false;
	let currentAudioSource = null;

	function createVisualizer() {
	const barCount = 64;
	for (let i = 0; i < barCount; i++) {
	const bar = document.createElement('div');
	bar.className = 'bar';
	visualizer.appendChild(bar);
	}
	bars = visualizer.getElementsByClassName('bar');
	}

	function updateVisualizer() {
	analyser.getByteFrequencyData(dataArray);
	for (let i = 0; i < bars.length; i++) {
	const barHeight = dataArray[i] / 2;
	bars[i].style.height = barHeight + 'px';
	}
	animationId = requestAnimationFrame(updateVisualizer);
	}

	async function initializePipelines() {
	try {
	sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
	ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
	quantized: false,
	});
	addLog('System: Voice Chat Bot initialized. Click "Begin Call" to start.');
	startButton.disabled = false;
	loadingDiv.style.display = 'none';
	} catch (error) {
	console.error('Error initializing pipelines:', error);
	addLog('System: Error initializing Voice Chat Bot. Please check the console for details.');
	loadingDiv.style.display = 'none';
	}
	}

	async function processSpeech(audio) {
	try {
	if (!sttPipeline \|\| !ttsPipeline) {
	throw new Error('Pipelines not initialized');
	}

	const transcription = await sttPipeline(audio);
	addLog(`User: ${transcription.text}`);

	const botResponse = `I heard you say: "${transcription.text}".`;
	addLog(`Bot: ${botResponse}`);

	isSpeaking = true;
	const speechOutput = await ttsPipeline(botResponse);
	await playAudio(speechOutput.audio);
	isSpeaking = false;
	} catch (error) {
	console.error('Error processing speech:', error);
	addLog('System: Error processing speech. Please try again.');
	}
	}

	function addLog(message) {
	const now = new Date();
	const timestamp = now.toLocaleTimeString();
	const logMessage = `[${timestamp}] ${message}`;
	const messageElement = document.createElement('div');
	messageElement.textContent = logMessage;
	logsDiv.appendChild(messageElement);
	logsDiv.scrollTop = logsDiv.scrollHeight;
	}

	function playAudio(audioArray) {
	return new Promise((resolve) => {
	const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
	const channelData = audioBuffer.getChannelData(0);
	channelData.set(audioArray);

	const source = audioContext.createBufferSource();
	currentAudioSource = source; // Store the current audio source
	source.buffer = audioBuffer;
	source.connect(analyser);
	analyser.connect(audioContext.destination);
	source.start();
	source.onended = () => {
	currentAudioSource = null;
	resolve();
	};
	});
	}

	function stopCurrentAudio() {
	if (currentAudioSource) {
	currentAudioSource.stop();
	currentAudioSource = null;
	}
	}

	async function toggleListening() {
	if (isListening) {
	await stopListening();
	} else {
	await startListening();
	}
	}

	async function startListening() {
	try {
	audioContext = new (window.AudioContext \|\| window.webkitAudioContext)();
	analyser = audioContext.createAnalyser();
	analyser.fftSize = 128;
	dataArray = new Uint8Array(analyser.frequencyBinCount);

	myvad = await vad.MicVAD.new({
	onSpeechStart: () => {
	addLog('--- vad: speech start');
	updateVisualizer();
	if (isSpeaking) {
	addLog('User interrupted. Stopping bot speech.');
	stopCurrentAudio();
	isSpeaking = false;
	}
	},
	onSpeechEnd: (audio) => {
	addLog('--- vad: speech end');
	cancelAnimationFrame(animationId);
	processSpeech(audio);
	}
	});

	// Enable echo cancellation in audio input
	microphoneStream = await navigator.mediaDevices.getUserMedia({
	audio: {
	echoCancellation: true, // Enable echo cancellation
	noiseSuppression: true, // Optional: Enable noise suppression
	autoGainControl: true, // Optional: Enable auto gain control
	voiceIsolation: true // Enable voice isolation if supported
	}
	});

	console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
	console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());

	const supportedConstraints = navigator.mediaDevices.getSupportedConstraints();
	console.log('Supported constraints:', supportedConstraints);

	if (!supportedConstraints.echoCancellation) {
	console.warn('Echo cancellation is not supported on this device or browser.');
	}

	const source = audioContext.createMediaStreamSource(microphoneStream);
	source.connect(analyser);

	console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());

	await myvad.start();
	startButton.textContent = 'End Call';
	isListening = true;
	addLog('System: Listening...');
	} catch (error) {
	console.error('Error starting VAD:', error);
	addLog('System: Error starting voice detection. Please check your microphone and try again.');
	}
	}

	async function stopListening() {
	if (myvad) {
	try {
	await myvad.destroy();
	} catch (error) {
	console.error('Error stopping VAD:', error);
	}
	myvad = null;
	}
	if (microphoneStream) {
	microphoneStream.getTracks().forEach(track => track.stop());
	microphoneStream = null;
	}
	if (audioContext) {
	await audioContext.close();
	audioContext = null;
	}
	stopCurrentAudio();
	startButton.textContent = 'Begin Call';
	isListening = false;
	addLog('System: Stopped listening.');
	cancelAnimationFrame(animationId);
	addLog('System: Microphone closed');
	}

	startButton.addEventListener('click', toggleListening);
	clearLogsButton.addEventListener('click', () => {
	logsDiv.innerHTML = '';
	});

	createVisualizer();
	initializePipelines();
	</script>
	</body>
	</html>