Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

digitalhuman / index.backup7.html

atlury

Rename index.html to index.backup7.html

860c0c2 verified 4 months ago

raw

history blame contribute delete

16.3 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Voice Chat Bot with Advanced Echo Cancellation</title>
	<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
	<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
	<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>


	<style>
	body {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	margin: 0;
	padding: 20px;
	background-color: #1a1a1a;
	color: #f0f0f0;
	}
	.container {
	max-width: 800px;
	margin: 0 auto;
	}
	h1 {
	color: #ffd700;
	text-align: center;
	margin-bottom: 10px;
	}
	.subtitle {
	text-align: center;
	color: #ffd700;
	margin-bottom: 20px;
	}
	#chat-container {
	display: flex;
	flex-direction: column;
	height: 70vh;
	}
	#conversation {
	flex-grow: 1;
	border: 1px solid #444;
	padding: 10px;
	overflow-y: scroll;
	background-color: #2a2a2a;
	border-radius: 5px;
	margin-bottom: 20px;
	}
	#controls {
	display: flex;
	justify-content: center;
	margin-bottom: 20px;
	}
	button {
	font-size: 18px;
	padding: 10px 20px;
	background-color: #ffd700;
	color: #1a1a1a;
	border: none;
	border-radius: 5px;
	cursor: pointer;
	transition: background-color 0.3s;
	}
	button:hover {
	background-color: #ffec8b;
	}
	button:disabled {
	background-color: #666;
	cursor: not-allowed;
	}
	#visualizer {
	width: 100%;
	height: 100px;
	background-color: #2a2a2a;
	border-radius: 5px;
	overflow: hidden;
	margin-bottom: 20px;
	}
	.bar {
	width: 5px;
	height: 100%;
	background-color: #ffd700;
	display: inline-block;
	margin-right: 1px;
	}
	#loading {
	position: fixed;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-color: rgba(0, 0, 0, 0.8);
	display: flex;
	justify-content: center;
	align-items: center;
	z-index: 1000;
	}
	.spinner {
	width: 50px;
	height: 50px;
	border: 5px solid #f3f3f3;
	border-top: 5px solid #ffd700;
	border-radius: 50%;
	animation: spin 1s linear infinite;
	}
	@keyframes spin {
	0% { transform: rotate(0deg); }
	100% { transform: rotate(360deg); }
	}
	#configuration {
	margin-bottom: 20px;
	}
	select {
	width: 100%;
	padding: 10px;
	font-size: 16px;
	background-color: #2a2a2a;
	color: #f0f0f0;
	border: 1px solid #444;
	border-radius: 5px;
	}
	#model-info {
	margin-top: 10px;
	font-size: 14px;
	color: #aaa;
	}
	#logs {
	background-color: #2a2a2a;
	border: 1px solid #444;
	border-radius: 5px;
	padding: 10px;
	height: 200px;
	overflow-y: scroll;
	font-family: monospace;
	font-size: 14px;
	}
	#clear-logs {
	margin-top: 10px;
	font-size: 14px;
	padding: 5px 10px;
	}
	#localVideo, #remoteVideo {
	display: none;
	}
	</style>
	</head>
	<body>
	<div id="loading">
	<div class="spinner"></div>
	</div>
	<div class="container">
	<h1>Digital Human Voice Chat</h1>
	<p class="subtitle">For best results, use headphones.</p>
	<div id="chat-container">
	<div id="controls">
	<button id="startButton" disabled>Begin Call</button>
	</div>
	<div id="configuration">
	<select id="configSelect">
	<option value="fastest">Fastest</option>
	<option value="balanced">Balanced</option>
	<option value="quality">Highest Quality</option>
	</select>
	<div id="model-info">
	TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k
	</div>
	</div>
	<div id="visualizer"></div>
	<div id="conversation"></div>
	</div>
	<h2>Logs</h2>
	<div id="logs"></div>
	<button id="clear-logs">Clear</button>
	</div>
	<video id="localVideo" autoplay></video>
	<video id="remoteVideo" autoplay></video>

	<script type="module">
	import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
	import { ChatModule } from 'https://esm.run/@mlc-ai/web-llm';

	env.localModelPath = './models';

	// Configure environment before initializing pipelines
	env.backends = ['wasm'];
	env.wasm = env.wasm \|\| {};
	env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
	env.wasm.simd = true;
	env.numThreads = navigator.hardwareConcurrency \|\| 4;

	const conversationDiv = document.getElementById('conversation');
	const startButton = document.getElementById('startButton');
	const visualizer = document.getElementById('visualizer');
	const loadingDiv = document.getElementById('loading');
	const logsDiv = document.getElementById('logs');
	const clearLogsButton = document.getElementById('clear-logs');
	const localVideo = document.getElementById('localVideo');
	const remoteVideo = document.getElementById('remoteVideo');

	let myvad;
	let sttPipeline;
	let ttsPipeline;
	let llmEngine;
	let audioContext;
	let analyser;
	let dataArray;
	let bars;
	let animationId;
	let isListening = false;
	let microphoneStream;
	let isSpeaking = false;
	let currentAudioSource = null;
	let rtcConnection = null;
	let rtcLoopbackConnection = null;
	let loopbackStream = new MediaStream();

	function createVisualizer() {
	const barCount = 64;
	for (let i = 0; i < barCount; i++) {
	const bar = document.createElement('div');
	bar.className = 'bar';
	visualizer.appendChild(bar);
	}
	bars = visualizer.getElementsByClassName('bar');
	}

	function updateVisualizer() {
	analyser.getByteFrequencyData(dataArray);
	for (let i = 0; i < bars.length; i++) {
	const barHeight = dataArray[i] / 2;
	bars[i].style.height = barHeight + 'px';
	}
	animationId = setTimeout(updateVisualizer, 50);
	}


	async function initializePipelines() {
	try {
	addLog('System: Initializing pipelines...');
	[sttPipeline, ttsPipeline] = await Promise.all([
	pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
	pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
	]);

	addLog('System: Initializing WebLLM...');
	llmChat = new ChatModule({
	model: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k"
	});
	await llmChat.init();
	addLog('System: WebLLM initialized successfully.');

	addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
	startButton.disabled = false;
	loadingDiv.style.display = 'none';
	} catch (error) {
	console.error('Error initializing pipelines:', error);
	addLog(`System: Error initializing pipelines: ${error.message}`);
	loadingDiv.style.display = 'none';
	}
	}

	async function processSpeech(audio) {
	try {
	if (!sttPipeline \|\| !ttsPipeline \|\| !llmChat) {
	throw new Error('Pipelines not initialized');
	}

	const transcription = await sttPipeline(audio);
	addLog(`User: ${transcription.text}`);

	const reply = await llmChat.generate(transcription.text, {
	temperature: 0.7,
	max_new_tokens: 256
	});

	const botResponse = reply.trim();
	addLog(`Bot: ${botResponse}`);

	isSpeaking = true;
	const speechOutput = await ttsPipeline(botResponse);
	await playAudio(speechOutput.audio);
	isSpeaking = false;
	} catch (error) {
	console.error('Error processing speech:', error);
	addLog(`System: Error processing speech: ${error.message}`);
	}
	}

	function addLog(message) {
	const now = new Date();
	const timestamp = now.toLocaleTimeString();
	const logMessage = `[${timestamp}] ${message}`;
	const messageElement = document.createElement('div');
	messageElement.textContent = logMessage;
	logsDiv.appendChild(messageElement);
	logsDiv.scrollTop = logsDiv.scrollHeight;
	}

	function playAudio(audioArray) {
	return new Promise((resolve) => {
	const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
	const channelData = audioBuffer.getChannelData(0);
	channelData.set(audioArray);

	const source = audioContext.createBufferSource();
	currentAudioSource = source;
	source.buffer = audioBuffer;
	source.connect(analyser);
	analyser.connect(audioContext.destination);
	source.start();
	source.onended = () => {
	currentAudioSource = null;
	resolve();
	};
	});
	}

	function stopCurrentAudio() {
	if (currentAudioSource) {
	currentAudioSource.stop();
	currentAudioSource = null;
	}
	}

	async function toggleListening() {
	if (isListening) {
	await stopListening();
	} else {
	await startListening();
	}
	}

	async function startListening() {
	try {
	audioContext = new (window.AudioContext \|\| window.webkitAudioContext)();
	analyser = audioContext.createAnalyser();
	analyser.fftSize = 128;
	dataArray = new Uint8Array(analyser.frequencyBinCount);

	localVideo.volume = 0;
	localVideo.muted = true;
	document.getElementById('localVideo').volume = 0;

	remoteVideo.volume = 0;
	remoteVideo.muted = true;
	document.getElementById('remoteVideo').volume = 0;

	microphoneStream = await navigator.mediaDevices.getUserMedia({
	audio: true,
	video: { width: 1, height: 1 }
	});

	localVideo.srcObject = microphoneStream;
	await localVideo.play();

	console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
	console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());

	const offerOptions = {
	offerToReceiveAudio: true,
	offerToReceiveVideo: false,
	};

	rtcConnection = new RTCPeerConnection();
	rtcLoopbackConnection = new RTCPeerConnection();

	rtcConnection.onicecandidate = e => e.candidate && rtcLoopbackConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
	rtcLoopbackConnection.onicecandidate = e => e.candidate && rtcConnection.addIceCandidate(new RTCIceCandidate(e.candidate));

	rtcLoopbackConnection.ontrack = e => e.streams[0].getTracks().forEach(track => loopbackStream.addTrack(track));

	microphoneStream.getTracks().forEach(track => rtcConnection.addTrack(track, microphoneStream));

	const offer = await rtcConnection.createOffer(offerOptions);
	await rtcConnection.setLocalDescription(offer);
	await rtcLoopbackConnection.setRemoteDescription(offer);
	const answer = await rtcLoopbackConnection.createAnswer();
	await rtcLoopbackConnection.setLocalDescription(answer);
	await rtcConnection.setRemoteDescription(answer);

	const source = audioContext.createMediaStreamSource(loopbackStream);
	source.connect(analyser);

	myvad = await vad.MicVAD.new({
	noiseSuppression: true,
	aggressiveness: 3,
	onSpeechStart: () => {
	addLog('--- Voice activity: speech start');
	updateVisualizer();
	if (isSpeaking) {
	addLog('User interrupted. Stopping bot speech.');
	stopCurrentAudio();
	isSpeaking = false;
	}
	},
	onSpeechEnd: (audio) => {
	addLog('--- Voice activity: speech end');
	cancelAnimationFrame(animationId);
	processSpeech(audio);
	}
	});

	await myvad.start();
	startButton.textContent = 'End Call';
	isListening = true;
	addLog('System: Listening...');
	} catch (error) {
	console.error('Error starting voice activity:', error);
	addLog(`System: Error starting voice detection: ${error.message}`);
	}
	}

	async function stopListening() {
	if (myvad) {
	try {
	await myvad.destroy();
	} catch (error) {
	console.error('Error stopping voice activity:', error);
	}
	myvad = null;
	}
	if (microphoneStream) {
	microphoneStream.getTracks().forEach(track => track.stop());
	microphoneStream = null;
	}
	if (audioContext) {
	await audioContext.close();
	audioContext = null;
	}
	if (localVideo) {
	localVideo.srcObject = null;
	}
	if (remoteVideo) {
	remoteVideo.srcObject = null;
	}
	if (rtcConnection) {
	rtcConnection.close();
	rtcConnection = null;
	}
	if (rtcLoopbackConnection) {
	rtcLoopbackConnection.close();
	rtcLoopbackConnection = null;
	}
	loopbackStream = new MediaStream();
	stopCurrentAudio();
	startButton.textContent = 'Begin Call';
	isListening = false;
	addLog('System: Stopped listening.');
	cancelAnimationFrame(animationId);
	addLog('System: Microphone closed');
	}

	startButton.addEventListener('click', toggleListening);
	clearLogsButton.addEventListener('click', () => {
	logsDiv.innerHTML = '';
	});

	createVisualizer();
	initializePipelines();
	</script>
	</body>
	</html>