Whisper / index.html
Almaatla's picture
Update index.html
8d54513 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Whisper WAV Transcription</title>
<style>
body {
font-family: sans-serif;
display: flex;
flex-direction: column;
align-items: center;
padding: 20px;
}
#container {
width: 80%;
border: 1px solid #ccc;
padding: 20px;
border-radius: 5px;
}
input[type="text"], input[type="file"], button {
padding: 10px;
margin: 10px 0;
width: calc(100% - 22px);
box-sizing: border-box;
border: 1px solid #ccc;
border-radius: 3px;
}
button {
background-color: #4CAF50;
color: white;
cursor: pointer;
}
button:disabled {
background-color: #ccc;
cursor: not-allowed;
}
#transcription {
margin-top: 20px;
padding: 15px;
border: 1px solid #ccc;
border-radius: 5px;
white-space: pre-wrap;
width: calc(100% - 32px); /* Adjust width for padding */
box-sizing: border-box;
}
</style>
</head>
<body>
<div id="container">
<h1>Whisper WAV Transcription</h1>
<h3>API Key</h3>
<input type="text" id="apiKey"> <!-- Hidden input for API key -->
<h3>API base url (with '.../v1/' at the end)</h3>
<input type="text" id="apiBaseUrl"> <!-- Hidden input for API base URL -->
<label for="audioFile">Select WAV File:</label>
<input type="file" id="audioFile" accept=".wav">
<button id="transcribeButton" onclick="transcribeAudio()">Transcribe</button>
<div id="transcription"></div>
<button id="copyButton" onclick="copyToClipboard()" style="display: none;">Copy to Clipboard</button>
</div>
<script>
// Function to extract parameters from URL hash
function getHashParams() {
const hash = window.location.hash.substring(1);
const params = {};
hash.split('&').forEach(part => {
console.log(part);
const [key, value] = part.split('=');
if (key && value) {
params[key] = value;
}
});
return params;
}
async function transcribeAudio() {
const apiKey = document.getElementByID('apiKey').value;
const apiBaseUrl = document.getElementByID('apiBaseUrl').value;
const audioFile = document.getElementById('audioFile').files[0];
const transcriptionDiv = document.getElementById('transcription');
if (!apiKey || !audioFile) {
console.log(apiKey);
alert('Please provide both API key and a WAV file.');
return;
}
const transcribeButton = document.getElementById('transcribeButton');
transcribeButton.disabled = true;
transcribeButton.textContent = 'Transcribing...';
transcriptionDiv.innerHTML = 'Transcribing... Please wait.';
try {
const chunks = await splitAudioFile(audioFile);
let fullTranscription = '';
for (let i = 0; i < chunks.length; i++) {
const formData = new FormData();
formData.append('file', chunks[i], `chunk_${i + 1}.wav`);
formData.append('model', 'whisper');
const response = await fetch(`${apiBaseUrl}audio/transcriptions`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`
},
body: formData
});
if (!response.ok) {
const errorData = await response.json();
throw new Error(`API error: ${errorData.error?.message || response.statusText}`);
}
const data = await response.json();
fullTranscription += (fullTranscription ? ' ' : '') + data.text.trim();
transcriptionDiv.innerHTML = fullTranscription;
}
transcriptionDiv.innerHTML = fullTranscription;
document.getElementById('copyButton').style.display = 'block'; // Show copy button
} catch (error) {
console.error('Error during transcription:', error);
transcriptionDiv.innerHTML = `Error: ${error.message}`;
} finally {
transcribeButton.disabled = false;
transcribeButton.textContent = 'Transcribe';
}
}
// Audio Chunking Functions (from original code)
async function splitAudioFile(file) {
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const reader = new FileReader();
return new Promise((resolve, reject) => {
reader.onload = async function (event) {
try {
const audioBuffer = await audioContext.decodeAudioData(event.target.result);
const chunks = [];
const chunkSize = 24 * 1024 * 1024; // 25MB in bytes
const sampleRate = audioBuffer.sampleRate;
const bytesPerSample = 4; // Assuming 32-bit audio
const samplesPerChunk = Math.floor(chunkSize / bytesPerSample);
const chunksCount = Math.ceil(audioBuffer.length / samplesPerChunk);
for (let i = 0; i < chunksCount; i++) {
const startSample = i * samplesPerChunk;
const endSample = Math.min((i + 1) * samplesPerChunk, audioBuffer.length);
const chunkDuration = (endSample - startSample) / sampleRate;
const chunkBuffer = audioContext.createBuffer(
audioBuffer.numberOfChannels,
endSample - startSample,
sampleRate
);
for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
const chunkChannelData = chunkBuffer.getChannelData(channel);
audioBuffer.copyFromChannel(chunkChannelData, channel, startSample);
}
const wavBlob = await bufferToWav(chunkBuffer);
chunks.push(new File([wavBlob], `chunk_${i + 1}.wav`, { type: 'audio/wav' }));
}
resolve(chunks);
} catch (error) {
reject(error);
}
};
reader.onerror = reject;
reader.readAsArrayBuffer(file);
});
}
function bufferToWav(buffer) {
const interleaved = new Float32Array(buffer.length * buffer.numberOfChannels);
for (let channel = 0; channel < buffer.numberOfChannels; channel++) {
const channelData = buffer.getChannelData(channel);
for (let i = 0; i < buffer.length; i++) {
interleaved[i * buffer.numberOfChannels + channel] = channelData[i];
}
}
const wavBuffer = new ArrayBuffer(44 + interleaved.length * 2);
const view = new DataView(wavBuffer);
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + interleaved.length * 2, true);
writeString(view, 8, 'WAVE');
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, buffer.numberOfChannels, true);
view.setUint32(24, buffer.sampleRate, true);
view.setUint32(28, buffer.sampleRate * 4, true);
view.setUint16(32, buffer.numberOfChannels * 2, true);
view.setUint16(34, 16, true);
writeString(view, 36, 'data');
view.setUint32(40, interleaved.length * 2, true);
const floatTo16BitPCM = (output, offset, input) => {
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
};
floatTo16BitPCM(view, 44, interleaved);
return new Blob([wavBuffer], { type: 'audio/wav' });
}
function writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function copyToClipboard() {
const transcriptionText = document.getElementById('transcription').innerText;
navigator.clipboard.writeText(transcriptionText)
.then(() => {
alert('Transcription copied to clipboard!');
})
.catch(err => {
console.error('Failed to copy transcription: ', err);
});
}
</script>
</body>
</html>