Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

atlury commited on Sep 16, 2024

Commit

98abda4

verified ·

1 Parent(s): 67af410

Update index.html

Browse files

Files changed (1) hide show

index.html +25 -17

index.html CHANGED Viewed

@@ -161,7 +161,7 @@
                     <option value="quality">Highest Quality</option>
                 </select>
                 <div id="model-info">
-                    TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/Qwen1.5-0.5B-Chat
                 </div>
             </div>
             <div id="visualizer"></div>
@@ -176,6 +176,7 @@
     <script type="module">
         import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
         env.localModelPath = './models';
@@ -198,7 +199,7 @@
         let myvad;
         let sttPipeline;
         let ttsPipeline;
-        let llmPipeline;
         let audioContext;
         let analyser;
         let dataArray;
@@ -234,13 +235,23 @@
         async function initializePipelines() {
             try {
                 addLog('System: Initializing pipelines...');
-                [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
                     pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
-                    pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
-                    pipeline('text-generation', 'Xenova/Qwen1.5-0.5B-Chat', { quantized: true })
                 ]);
-                addLog('System: Digital Human Voice Chat initialized with Qwen1.5-0.5B-Chat. Click "Begin Call" to start.');
                 startButton.disabled = false;
                 loadingDiv.style.display = 'none';
             } catch (error) {
@@ -252,24 +263,21 @@
         async function processSpeech(audio) {
             try {
-                if (!sttPipeline || !ttsPipeline || !llmPipeline) {
                     throw new Error('Pipelines not initialized');
                 }
                 const transcription = await sttPipeline(audio);
                 addLog(`User: ${transcription.text}`);
-                const messages = [
-                    { role: 'system', content: 'You are a helpful assistant.' },
-                    { role: 'user', content: transcription.text }
-                ];
-                const llmResponse = await llmPipeline(messages, {
-                    max_new_tokens: 128,
-                    do_sample: false
                 });
-                const botResponse = llmResponse[0].generated_text.find(msg => msg.role === 'assistant').content;
                 addLog(`Bot: ${botResponse}`);
                 isSpeaking = true;
@@ -397,7 +405,7 @@
                 });
                 await myvad.start();
-                startButton.textContent = 'End Call';
                 isListening = true;
                 addLog('System: Listening...');
             } catch (error) {

                     <option value="quality">Highest Quality</option>
                 </select>
                 <div id="model-info">
+                    TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k
                 </div>
             </div>
             <div id="visualizer"></div>
     <script type="module">
         import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
+        import * as webllm from 'https://esm.run/@mlc-ai/web-llm';
         env.localModelPath = './models';
         let myvad;
         let sttPipeline;
         let ttsPipeline;
+        let llmEngine;
         let audioContext;
         let analyser;
         let dataArray;
         async function initializePipelines() {
             try {
                 addLog('System: Initializing pipelines...');
+                [sttPipeline, ttsPipeline] = await Promise.all([
                     pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
+                    pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
                 ]);
+                const initProgressCallback = (report) => {
+                    addLog(`System: ${report.text}`);
+                };
+                const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k";
+                llmEngine = await webllm.CreateEngine(
+                    selectedModel,
+                    {
+                        initProgressCallback: initProgressCallback
+                    }
+                );
+                addLog('System: Digital Human Voice Chat initialized with WebLLM. Click "Begin Call" to start.');
                 startButton.disabled = false;
                 loadingDiv.style.display = 'none';
             } catch (error) {
         async function processSpeech(audio) {
             try {
+                if (!sttPipeline || !ttsPipeline || !llmEngine) {
                     throw new Error('Pipelines not initialized');
                 }
                 const transcription = await sttPipeline(audio);
                 addLog(`User: ${transcription.text}`);
+                const reply = await llmEngine.chat.completions.create({
+                    messages: [
+                        { role: 'system', content: 'You are a helpful assistant.' },
+                        { role: 'user', content: transcription.text }
+                    ]
                 });
+                const botResponse = reply.choices[0].message.content;
                 addLog(`Bot: ${botResponse}`);
                 isSpeaking = true;
                 });
                 await myvad.start();
+startButton.textContent = 'End Call';
                 isListening = true;
                 addLog('System: Listening...');
             } catch (error) {