// digital.human.audio.js import * as webllm from "https://esm.run/@mlc-ai/web-llm"; // Ensure the script runs after the DOM is fully loaded document.addEventListener("DOMContentLoaded", () => { // Initialize the Digital Human Voice section const voiceMessages = [ { content: "You are Aged Guru, an intelligent assistant skilled in digital human voice interactions. Provide insightful and comprehensive answers using human-like voice responses.", role: "system" } ]; const voiceAvailableModels = webllm.prebuiltAppConfig.model_list.map( (m) => m.model_id ); let voiceSelectedModel = "gemma-2-2b-it-q4f16_1-MLC-1k"; // Default model function voiceUpdateEngineInitProgressCallback(report) { console.log("Digital Human Voice Initialize", report.progress); // Instead of updating a status span, log the progress logMessage(`Model Initialization Progress: ${report.text}`, "system"); } const voiceEngine = new webllm.MLCEngine(); voiceEngine.setInitProgressCallback(voiceUpdateEngineInitProgressCallback); let voiceIsGenerating = false; // Flag to prevent multiple generations async function voiceInitializeWebLLMEngine() { logMessage("Model initialization started.", "system"); document.getElementById("voice-loading-spinner").classList.remove("hidden"); // Show spinner voiceSelectedModel = document.getElementById("voice-model-selection").value; const config = { temperature: 0.7, // Adjusted for more precise answers top_p: 0.9 }; try { await voiceEngine.reload(voiceSelectedModel, config); document.getElementById("voice-selected-model").textContent = voiceSelectedModel; document.getElementById("voice-start_button").disabled = false; document.getElementById("voice-text-input").disabled = false; // Enable text input after initialization document.getElementById("voice-submit-button").disabled = false; // Enable submit button after initialization document.getElementById("voice-speech-controls").disabled = false; // Enable speech controls after initialization document.getElementById("voice-configuration").classList.remove("hidden"); logMessage("Model initialized successfully.", "system"); } catch (error) { console.error("Error initializing the model:", error); alert("Failed to initialize the model. Please try again."); logMessage("Failed to initialize the model.", "error"); } finally { document.getElementById("voice-loading-spinner").classList.add("hidden"); // Hide spinner } } async function voiceStreamingGenerating(messages, onUpdate, onFinish, onError) { if (voiceIsGenerating) { console.warn("Voice Generation already in progress."); return; } voiceIsGenerating = true; try { let curMessage = ""; const completion = await voiceEngine.chat.completions.create({ stream: true, messages }); for await (const chunk of completion) { const curDelta = chunk.choices[0].delta.content; if (curDelta) { curMessage += curDelta; } onUpdate(curMessage); } const finalMessage = await voiceEngine.getMessage(); console.log(`Voice Generated final message: ${finalMessage}`); // Debugging onFinish(finalMessage); logMessage("Response generated successfully.", "system"); } catch (err) { console.error(err); onError(err); logMessage("An error occurred during response generation.", "error"); } finally { voiceIsGenerating = false; } } // Flag to track the last input method let voiceLastInputWasVoice = false; function voiceAppendMessage(message) { console.log(`Voice Appending message: ${message.content} (Role: ${message.role})`); // Debugging const voiceChatBox = document.getElementById("voice-chat-box"); // Check if the assistant's message is already appended to avoid duplication if (message.role === "assistant") { const existingMessages = voiceChatBox.querySelectorAll(".message"); const lastMessage = existingMessages[existingMessages.length - 1]; if (lastMessage && lastMessage.textContent === message.content) { console.warn("Duplicate assistant message detected in Voice section, skipping append."); // Only trigger TTS for assistant messages if the last input was via voice if (message.role === "assistant" && message.content !== "typing..." && voiceLastInputWasVoice) { voiceSpeak(message.content); } return; // Exit to avoid appending the same message twice } } const container = document.createElement("div"); container.classList.add("message-container"); const newMessage = document.createElement("div"); newMessage.classList.add("message"); newMessage.textContent = message.content; if (message.role === "user") { container.classList.add("user"); } else { container.classList.add("assistant"); } container.appendChild(newMessage); voiceChatBox.appendChild(container); voiceChatBox.scrollTop = voiceChatBox.scrollHeight; // Only trigger TTS for assistant messages if the last input was via voice if (message.role === "assistant" && message.content !== "typing..." && voiceLastInputWasVoice) { voiceSpeak(message.content); } } function voiceUpdateLastMessage(content) { const messageDoms = document.getElementById("voice-chat-box").querySelectorAll(".message"); const lastMessageDom = messageDoms[messageDoms.length - 1]; lastMessageDom.textContent = content; } function voiceOnSpeechRecognized(transcript) { const input = transcript.trim(); const message = { content: input, role: "user" }; if (input.length === 0) { return; } voiceLastInputWasVoice = true; // Set flag as voice input console.log(`Voice input received: ${input}`); // Debugging document.getElementById("voice-start_button").disabled = true; document.getElementById("voice-submit-button").disabled = true; // Disable submit button during processing voiceMessages.push(message); voiceAppendMessage(message); logMessage(`User (Voice): ${input}`, "user"); // Append "typing..." placeholder const aiPlaceholder = { content: "typing...", role: "assistant" }; voiceAppendMessage(aiPlaceholder); logMessage("VoiceBot is typing...", "system"); const onFinishGenerating = (finalMessage) => { console.log(`Voice Finishing generation with message: ${finalMessage}`); // Debugging // Remove the "typing..." placeholder const voiceChatBox = document.getElementById("voice-chat-box"); const lastMessageContainer = voiceChatBox.lastElementChild; if (lastMessageContainer && lastMessageContainer.querySelector(".message").textContent === "typing...") { voiceChatBox.removeChild(lastMessageContainer); } // Append the final message const aiMessage = { content: finalMessage, role: "assistant" }; voiceAppendMessage(aiMessage); logMessage(`VoiceBot: ${finalMessage}`, "assistant"); document.getElementById("voice-start_button").disabled = false; document.getElementById("voice-submit-button").disabled = false; // Re-enable submit button after processing voiceEngine.runtimeStatsText().then((statsText) => { document.getElementById("voice-chat-stats").classList.remove("hidden"); document.getElementById("voice-chat-stats").textContent = statsText; logMessage(`Runtime Stats: ${statsText}`, "system"); }); }; voiceStreamingGenerating( voiceMessages, voiceUpdateLastMessage, onFinishGenerating, (err) => { console.error(err); alert("An error occurred while generating the response. Please try again."); logMessage("Error during response generation.", "error"); document.getElementById("voice-start_button").disabled = false; document.getElementById("voice-submit-button").disabled = false; } ); } // Speech Recognition Code for Voice let voiceRecognizing = false; let voiceIgnore_onend; let voiceFinal_transcript = ''; let voiceRecognition; function voiceStartButton(event) { if (voiceRecognizing) { voiceRecognition.stop(); return; } voiceFinal_transcript = ''; voiceRecognition.lang = 'en-US'; voiceRecognition.start(); voiceIgnore_onend = false; document.getElementById("voice-start_button").classList.add("mic-animate"); logMessage("Voice input started.", "system"); } if (!('webkitSpeechRecognition' in window)) { alert("Web Speech API is not supported by this browser."); logMessage("Web Speech API not supported by this browser.", "error"); } else { voiceRecognition = new webkitSpeechRecognition(); voiceRecognition.continuous = false; // Non-continuous recognition voiceRecognition.interimResults = false; // Get only final results voiceRecognition.onstart = function() { voiceRecognizing = true; logMessage("Speech recognition started.", "system"); }; voiceRecognition.onerror = function(event) { if (event.error == 'no-speech') { document.getElementById("voice-start_button").classList.remove("mic-animate"); alert('No speech was detected in Voice section.'); logMessage("No speech detected.", "error"); voiceIgnore_onend = true; } if (event.error == 'audio-capture') { document.getElementById("voice-start_button").classList.remove("mic-animate"); alert('No microphone was found in Voice section.'); logMessage("No microphone found.", "error"); voiceIgnore_onend = true; } if (event.error == 'not-allowed') { alert('Permission to use microphone was denied in Voice section.'); logMessage("Microphone permission denied.", "error"); voiceIgnore_onend = true; } }; voiceRecognition.onend = function() { voiceRecognizing = false; document.getElementById("voice-start_button").classList.remove("mic-animate"); logMessage("Speech recognition ended.", "system"); if (voiceIgnore_onend) { return; } if (!voiceFinal_transcript) { logMessage("No transcript captured.", "error"); return; } // Process the final transcript voiceOnSpeechRecognized(voiceFinal_transcript); }; voiceRecognition.onresult = function(event) { for (let i = event.resultIndex; i < event.results.length; ++i) { if (event.results[i].isFinal) { voiceFinal_transcript += event.results[i][0].transcript; } } voiceFinal_transcript = voiceFinal_transcript.trim(); logMessage(`Recognized Speech: ${voiceFinal_transcript}`, "user"); }; } document.getElementById("voice-start_button").addEventListener("click", function(event) { voiceStartButton(event); }); // Initialize Model Selection voiceAvailableModels.forEach((modelId) => { const option = document.createElement("option"); option.value = modelId; option.textContent = modelId; document.getElementById("voice-model-selection").appendChild(option); }); document.getElementById("voice-model-selection").value = voiceSelectedModel; // **Enable the Download Model button after models are loaded** document.getElementById("voice-download").disabled = false; document.getElementById("voice-download").addEventListener("click", function () { voiceInitializeWebLLMEngine().then(() => { document.getElementById("voice-start_button").disabled = false; // Enable speech controls after model initialization document.getElementById("voice-speech-rate").disabled = false; document.getElementById("voice-speech-pitch").disabled = false; logMessage("Model download initiated.", "system"); }); }); document.getElementById("voice-clear-logs").addEventListener("click", function () { document.getElementById("voice-logs").innerHTML = ''; logMessage("Logs cleared.", "system"); }); // ===== TTS Integration ===== // Initialize Speech Synthesis let voiceSpeech = new SpeechSynthesisUtterance(); voiceSpeech.lang = "en"; let voiceVoices = []; // Use addEventListener instead of directly assigning to onvoiceschanged window.speechSynthesis.addEventListener("voiceschanged", () => { voiceVoices = window.speechSynthesis.getVoices(); voicePopulateVoices(); }); function voicePopulateVoices() { const voiceSelect = document.getElementById("voice-tools"); voiceSelect.innerHTML = ''; // Clear existing options voiceVoices.forEach((voice, i) => { const option = new Option(voice.name, i); voiceSelect.appendChild(option); }); if (voiceVoices.length > 0) { const savedVoice = localStorage.getItem("voiceSelectedVoice"); if (savedVoice !== null && voiceVoices[savedVoice]) { voiceSpeech.voice = voiceVoices[savedVoice]; voiceSelect.value = savedVoice; } else { voiceSpeech.voice = voiceVoices[0]; } } } // Voice Selection Event Listener document.getElementById("voice-tools").addEventListener("change", () => { const selectedVoiceIndex = document.getElementById("voice-tools").value; voiceSpeech.voice = voiceVoices[selectedVoiceIndex]; // Save to localStorage localStorage.setItem("voiceSelectedVoice", selectedVoiceIndex); logMessage(`Voice changed to: ${voiceVoices[selectedVoiceIndex].name}`, "system"); }); // Function to Speak Text with Voice Selection and Handling Large Texts function voiceSpeak(text) { if (!window.speechSynthesis) { console.warn("Speech Synthesis not supported in this browser for Voice section."); logMessage("Speech Synthesis not supported in this browser.", "error"); return; } // Show spinner and enable Stop button document.getElementById("voice-loading-spinner").classList.remove("hidden"); document.getElementById("voice-stop_button").disabled = false; logMessage("TTS started.", "system"); // Retrieve the currently selected voice const selectedVoice = voiceSpeech.voice; // Split the text into sentences to manage large texts const sentences = text.match(/[^\.!\?]+[\.!\?]+/g) || [text]; let utterancesCount = sentences.length; sentences.forEach(sentence => { const utterance = new SpeechSynthesisUtterance(sentence.trim()); // Assign the selected voice to the utterance if (selectedVoice) { utterance.voice = selectedVoice; } // Assign rate and pitch from sliders const rate = parseFloat(document.getElementById("voice-speech-rate").value); const pitch = parseFloat(document.getElementById("voice-speech-pitch").value); utterance.rate = rate; // Adjust the speaking rate (0.1 to 10) utterance.pitch = pitch; // Adjust the pitch (0 to 2) // Add event listeners for debugging or additional functionality utterance.onstart = () => { console.log("Speech started:", sentence); logMessage(`TTS started: ${sentence.trim()}`, "system"); }; utterance.onend = () => { console.log("Speech ended:", sentence); logMessage(`TTS ended: ${sentence.trim()}`, "system"); utterancesCount--; if (utterancesCount === 0) { // Hide spinner and disable Stop button when all utterances have been spoken document.getElementById("voice-loading-spinner").classList.add("hidden"); document.getElementById("voice-stop_button").disabled = true; logMessage("All TTS messages have been spoken.", "system"); } }; utterance.onerror = (e) => { console.error("Speech Synthesis Error:", e); alert("An error occurred during speech synthesis. Please try again."); logMessage("Speech synthesis encountered an error.", "error"); utterancesCount = 0; document.getElementById("voice-loading-spinner").classList.add("hidden"); document.getElementById("voice-stop_button").disabled = true; }; window.speechSynthesis.speak(utterance); }); } // ===== New: Stop Speech Functionality ===== /** * Stops any ongoing speech synthesis. */ function voiceStopSpeech() { if (window.speechSynthesis.speaking) { window.speechSynthesis.cancel(); document.getElementById("voice-loading-spinner").classList.add("hidden"); document.getElementById("voice-stop_button").disabled = true; logMessage("Speech synthesis stopped by user.", "system"); } } // Event Listener for Stop Button document.getElementById("voice-stop_button").addEventListener("click", function () { voiceStopSpeech(); }); // ===== New: Text Input Handling ===== // Function to Handle Text Submission function voiceHandleTextSubmit() { const textInput = document.getElementById("voice-text-input"); const input = textInput.value.trim(); if (input.length === 0) { return; } textInput.value = ''; // Clear the input field const message = { content: input, role: "user" // Ensure this is correctly set }; console.log(`Voice Text input received: ${input}`); // Debugging logMessage(`User: ${input}`, "user"); voiceLastInputWasVoice = false; // Set flag as text input document.getElementById("voice-submit-button").disabled = true; // Disable to prevent multiple submissions voiceMessages.push(message); voiceAppendMessage(message); // Append "typing..." placeholder const aiPlaceholder = { content: "typing...", role: "assistant" }; voiceAppendMessage(aiPlaceholder); logMessage("VoiceBot is typing...", "system"); const onFinishGenerating = (finalMessage) => { console.log(`Voice Finishing generation with message: ${finalMessage}`); // Debugging // Remove the "typing..." placeholder const voiceChatBox = document.getElementById("voice-chat-box"); const lastMessageContainer = voiceChatBox.lastElementChild; if (lastMessageContainer && lastMessageContainer.querySelector(".message").textContent === "typing...") { voiceChatBox.removeChild(lastMessageContainer); } // Append the final message const aiMessage = { content: finalMessage, role: "assistant" }; voiceAppendMessage(aiMessage); logMessage(`VoiceBot: ${finalMessage}`, "assistant"); // Trigger TTS for assistant messages if required if (voiceLastInputWasVoice) { voiceSpeak(finalMessage); } document.getElementById("voice-submit-button").disabled = false; // Re-enable submit button after processing voiceEngine.runtimeStatsText().then((statsText) => { document.getElementById("voice-chat-stats").classList.remove("hidden"); document.getElementById("voice-chat-stats").textContent = statsText; logMessage(`Runtime Stats: ${statsText}`, "system"); }); }; voiceStreamingGenerating( voiceMessages, voiceUpdateLastMessage, onFinishGenerating, (err) => { console.error(err); alert("An error occurred while generating the response. Please try again."); logMessage("Error during response generation.", "error"); document.getElementById("voice-submit-button").disabled = false; } ); } // Event Listener for Submit Button document.getElementById("voice-submit-button").addEventListener("click", function () { voiceHandleTextSubmit(); }); // Event Listener for Enter Key in Text Input document.getElementById("voice-text-input").addEventListener("keypress", function (e) { if (e.key === 'Enter') { voiceHandleTextSubmit(); } }); // ===== Persisting User Preferences ===== // Load Preferences on Initialization window.addEventListener("load", () => { const savedVoice = localStorage.getItem("voiceSelectedVoice"); if (savedVoice !== null && voiceVoices[savedVoice]) { document.getElementById("voice-tools").value = savedVoice; voiceSpeech.voice = voiceVoices[savedVoice]; logMessage(`Loaded saved voice: ${voiceVoices[savedVoice].name}`, "system"); } const savedRate = localStorage.getItem("voiceSpeechRate"); if (savedRate !== null) { document.getElementById("voice-speech-rate").value = savedRate; voiceSpeech.rate = parseFloat(savedRate); logMessage(`Loaded saved speech rate: ${savedRate}`, "system"); } const savedPitch = localStorage.getItem("voiceSpeechPitch"); if (savedPitch !== null) { document.getElementById("voice-speech-pitch").value = savedPitch; voiceSpeech.pitch = parseFloat(savedPitch); logMessage(`Loaded saved speech pitch: ${savedPitch}`, "system"); } }); // Save Speech Rate document.getElementById("voice-speech-rate").addEventListener("input", (e) => { const rate = e.target.value; voiceSpeech.rate = parseFloat(rate); localStorage.setItem("voiceSpeechRate", rate); logMessage(`Speech rate changed to: ${rate}`, "system"); }); // Save Speech Pitch document.getElementById("voice-speech-pitch").addEventListener("input", (e) => { const pitch = e.target.value; voiceSpeech.pitch = parseFloat(pitch); localStorage.setItem("voiceSpeechPitch", pitch); logMessage(`Speech pitch changed to: ${pitch}`, "system"); }); // ===== Logging Function ===== /** * Logs messages to the #voice-logs container. * @param {string} message - The message to log. * @param {string} type - The type of message: 'user', 'assistant', 'system', 'error'. */ function logMessage(message, type) { const voiceLogs = document.getElementById("voice-logs"); const logEntry = document.createElement("div"); logEntry.classList.add("log-entry"); logEntry.textContent = `[${type.toUpperCase()}] ${message}`; // Style log entries based on type switch(type) { case 'user': logEntry.style.color = "#00796B"; break; case 'assistant': logEntry.style.color = "#004D40"; break; case 'system': logEntry.style.color = "#555555"; break; case 'error': logEntry.style.color = "#E53935"; break; default: logEntry.style.color = "#000000"; } voiceLogs.appendChild(logEntry); voiceLogs.scrollTop = voiceLogs.scrollHeight; } // ===== TTS Integration Continued ===== // Optional: Global Listener to Detect When All Speech Has Finished window.speechSynthesis.addEventListener('end', () => { console.log("All voice speech has been spoken."); logMessage("All TTS messages have been spoken.", "system"); // Ensure Stop button is disabled after speech ends document.getElementById("voice-stop_button").disabled = true; }); });