Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Whisper WAV Transcription</title> | |
<style> | |
body { | |
font-family: sans-serif; | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
padding: 20px; | |
} | |
#container { | |
width: 80%; | |
border: 1px solid #ccc; | |
padding: 20px; | |
border-radius: 5px; | |
} | |
input[type="text"], input[type="file"], button { | |
padding: 10px; | |
margin: 10px 0; | |
width: calc(100% - 22px); | |
box-sizing: border-box; | |
border: 1px solid #ccc; | |
border-radius: 3px; | |
} | |
button { | |
background-color: #4CAF50; | |
color: white; | |
cursor: pointer; | |
} | |
button:disabled { | |
background-color: #ccc; | |
cursor: not-allowed; | |
} | |
#transcription { | |
margin-top: 20px; | |
padding: 15px; | |
border: 1px solid #ccc; | |
border-radius: 5px; | |
white-space: pre-wrap; | |
width: calc(100% - 32px); /* Adjust width for padding */ | |
box-sizing: border-box; | |
} | |
</style> | |
</head> | |
<body> | |
<div id="container"> | |
<h1>Whisper WAV Transcription</h1> | |
<h3>API Key</h3> | |
<input type="text" id="apiKey"> <!-- Hidden input for API key --> | |
<h3>API base url (with '.../v1/' at the end)</h3> | |
<input type="text" id="apiBaseUrl"> <!-- Hidden input for API base URL --> | |
<label for="audioFile">Select WAV File:</label> | |
<input type="file" id="audioFile" accept=".wav"> | |
<button id="transcribeButton" onclick="transcribeAudio()">Transcribe</button> | |
<div id="transcription"></div> | |
<button id="copyButton" onclick="copyToClipboard()" style="display: none;">Copy to Clipboard</button> | |
</div> | |
<script> | |
// Function to extract parameters from URL hash | |
function getHashParams() { | |
const hash = window.location.hash.substring(1); | |
const params = {}; | |
hash.split('&').forEach(part => { | |
console.log(part); | |
const [key, value] = part.split('='); | |
if (key && value) { | |
params[key] = value; | |
} | |
}); | |
return params; | |
} | |
async function transcribeAudio() { | |
const apiKey = document.getElementByID('apiKey').value; | |
const apiBaseUrl = document.getElementByID('apiBaseUrl').value; | |
const audioFile = document.getElementById('audioFile').files[0]; | |
const transcriptionDiv = document.getElementById('transcription'); | |
if (!apiKey || !audioFile) { | |
console.log(apiKey); | |
alert('Please provide both API key and a WAV file.'); | |
return; | |
} | |
const transcribeButton = document.getElementById('transcribeButton'); | |
transcribeButton.disabled = true; | |
transcribeButton.textContent = 'Transcribing...'; | |
transcriptionDiv.innerHTML = 'Transcribing... Please wait.'; | |
try { | |
const chunks = await splitAudioFile(audioFile); | |
let fullTranscription = ''; | |
for (let i = 0; i < chunks.length; i++) { | |
const formData = new FormData(); | |
formData.append('file', chunks[i], `chunk_${i + 1}.wav`); | |
formData.append('model', 'whisper'); | |
const response = await fetch(`${apiBaseUrl}audio/transcriptions`, { | |
method: 'POST', | |
headers: { | |
'Authorization': `Bearer ${apiKey}` | |
}, | |
body: formData | |
}); | |
if (!response.ok) { | |
const errorData = await response.json(); | |
throw new Error(`API error: ${errorData.error?.message || response.statusText}`); | |
} | |
const data = await response.json(); | |
fullTranscription += (fullTranscription ? ' ' : '') + data.text.trim(); | |
transcriptionDiv.innerHTML = fullTranscription; | |
} | |
transcriptionDiv.innerHTML = fullTranscription; | |
document.getElementById('copyButton').style.display = 'block'; // Show copy button | |
} catch (error) { | |
console.error('Error during transcription:', error); | |
transcriptionDiv.innerHTML = `Error: ${error.message}`; | |
} finally { | |
transcribeButton.disabled = false; | |
transcribeButton.textContent = 'Transcribe'; | |
} | |
} | |
// Audio Chunking Functions (from original code) | |
async function splitAudioFile(file) { | |
const audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
const reader = new FileReader(); | |
return new Promise((resolve, reject) => { | |
reader.onload = async function (event) { | |
try { | |
const audioBuffer = await audioContext.decodeAudioData(event.target.result); | |
const chunks = []; | |
const chunkSize = 24 * 1024 * 1024; // 25MB in bytes | |
const sampleRate = audioBuffer.sampleRate; | |
const bytesPerSample = 4; // Assuming 32-bit audio | |
const samplesPerChunk = Math.floor(chunkSize / bytesPerSample); | |
const chunksCount = Math.ceil(audioBuffer.length / samplesPerChunk); | |
for (let i = 0; i < chunksCount; i++) { | |
const startSample = i * samplesPerChunk; | |
const endSample = Math.min((i + 1) * samplesPerChunk, audioBuffer.length); | |
const chunkDuration = (endSample - startSample) / sampleRate; | |
const chunkBuffer = audioContext.createBuffer( | |
audioBuffer.numberOfChannels, | |
endSample - startSample, | |
sampleRate | |
); | |
for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) { | |
const chunkChannelData = chunkBuffer.getChannelData(channel); | |
audioBuffer.copyFromChannel(chunkChannelData, channel, startSample); | |
} | |
const wavBlob = await bufferToWav(chunkBuffer); | |
chunks.push(new File([wavBlob], `chunk_${i + 1}.wav`, { type: 'audio/wav' })); | |
} | |
resolve(chunks); | |
} catch (error) { | |
reject(error); | |
} | |
}; | |
reader.onerror = reject; | |
reader.readAsArrayBuffer(file); | |
}); | |
} | |
function bufferToWav(buffer) { | |
const interleaved = new Float32Array(buffer.length * buffer.numberOfChannels); | |
for (let channel = 0; channel < buffer.numberOfChannels; channel++) { | |
const channelData = buffer.getChannelData(channel); | |
for (let i = 0; i < buffer.length; i++) { | |
interleaved[i * buffer.numberOfChannels + channel] = channelData[i]; | |
} | |
} | |
const wavBuffer = new ArrayBuffer(44 + interleaved.length * 2); | |
const view = new DataView(wavBuffer); | |
writeString(view, 0, 'RIFF'); | |
view.setUint32(4, 36 + interleaved.length * 2, true); | |
writeString(view, 8, 'WAVE'); | |
writeString(view, 12, 'fmt '); | |
view.setUint32(16, 16, true); | |
view.setUint16(20, 1, true); | |
view.setUint16(22, buffer.numberOfChannels, true); | |
view.setUint32(24, buffer.sampleRate, true); | |
view.setUint32(28, buffer.sampleRate * 4, true); | |
view.setUint16(32, buffer.numberOfChannels * 2, true); | |
view.setUint16(34, 16, true); | |
writeString(view, 36, 'data'); | |
view.setUint32(40, interleaved.length * 2, true); | |
const floatTo16BitPCM = (output, offset, input) => { | |
for (let i = 0; i < input.length; i++, offset += 2) { | |
const s = Math.max(-1, Math.min(1, input[i])); | |
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); | |
} | |
}; | |
floatTo16BitPCM(view, 44, interleaved); | |
return new Blob([wavBuffer], { type: 'audio/wav' }); | |
} | |
function writeString(view, offset, string) { | |
for (let i = 0; i < string.length; i++) { | |
view.setUint8(offset + i, string.charCodeAt(i)); | |
} | |
} | |
function copyToClipboard() { | |
const transcriptionText = document.getElementById('transcription').innerText; | |
navigator.clipboard.writeText(transcriptionText) | |
.then(() => { | |
alert('Transcription copied to clipboard!'); | |
}) | |
.catch(err => { | |
console.error('Failed to copy transcription: ', err); | |
}); | |
} | |
</script> | |
</body> | |
</html> |