Spaces:

OrganizedProgrammers
/

Whisper

Running

App Files Files Community

Whisper / index.html

Almaatla

Update index.html

8d54513 verified 5 months ago

raw

history blame contribute delete

9.67 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Whisper WAV Transcription</title>
	<style>
	body {
	font-family: sans-serif;
	display: flex;
	flex-direction: column;
	align-items: center;
	padding: 20px;
	}

	#container {
	width: 80%;
	border: 1px solid #ccc;
	padding: 20px;
	border-radius: 5px;
	}

	input[type="text"], input[type="file"], button {
	padding: 10px;
	margin: 10px 0;
	width: calc(100% - 22px);
	box-sizing: border-box;
	border: 1px solid #ccc;
	border-radius: 3px;
	}

	button {
	background-color: #4CAF50;
	color: white;
	cursor: pointer;
	}

	button:disabled {
	background-color: #ccc;
	cursor: not-allowed;
	}

	#transcription {
	margin-top: 20px;
	padding: 15px;
	border: 1px solid #ccc;
	border-radius: 5px;
	white-space: pre-wrap;
	width: calc(100% - 32px); /* Adjust width for padding */
	box-sizing: border-box;
	}
	</style>
	</head>
	<body>
	<div id="container">
	<h1>Whisper WAV Transcription</h1>
	<h3>API Key</h3>
	<input type="text" id="apiKey"> <!-- Hidden input for API key -->
	<h3>API base url (with '.../v1/' at the end)</h3>
	<input type="text" id="apiBaseUrl"> <!-- Hidden input for API base URL -->

	<label for="audioFile">Select WAV File:</label>
	<input type="file" id="audioFile" accept=".wav">

	<button id="transcribeButton" onclick="transcribeAudio()">Transcribe</button>

	<div id="transcription"></div>
	<button id="copyButton" onclick="copyToClipboard()" style="display: none;">Copy to Clipboard</button>
	</div>

	<script>
	// Function to extract parameters from URL hash
	function getHashParams() {
	const hash = window.location.hash.substring(1);
	const params = {};

	hash.split('&').forEach(part => {
	console.log(part);
	const [key, value] = part.split('=');
	if (key && value) {
	params[key] = value;
	}
	});

	return params;
	}

	async function transcribeAudio() {
	const apiKey = document.getElementByID('apiKey').value;
	const apiBaseUrl = document.getElementByID('apiBaseUrl').value;

	const audioFile = document.getElementById('audioFile').files[0];
	const transcriptionDiv = document.getElementById('transcription');

	if (!apiKey \|\| !audioFile) {
	console.log(apiKey);
	alert('Please provide both API key and a WAV file.');
	return;
	}

	const transcribeButton = document.getElementById('transcribeButton');
	transcribeButton.disabled = true;
	transcribeButton.textContent = 'Transcribing...';
	transcriptionDiv.innerHTML = 'Transcribing... Please wait.';

	try {
	const chunks = await splitAudioFile(audioFile);
	let fullTranscription = '';

	for (let i = 0; i < chunks.length; i++) {
	const formData = new FormData();
	formData.append('file', chunks[i], `chunk_${i + 1}.wav`);
	formData.append('model', 'whisper');

	const response = await fetch(`${apiBaseUrl}audio/transcriptions`, {
	method: 'POST',
	headers: {
	'Authorization': `Bearer ${apiKey}`
	},
	body: formData
	});

	if (!response.ok) {
	const errorData = await response.json();
	throw new Error(`API error: ${errorData.error?.message \|\| response.statusText}`);
	}

	const data = await response.json();
	fullTranscription += (fullTranscription ? ' ' : '') + data.text.trim();
	transcriptionDiv.innerHTML = fullTranscription;
	}

	transcriptionDiv.innerHTML = fullTranscription;
	document.getElementById('copyButton').style.display = 'block'; // Show copy button

	} catch (error) {
	console.error('Error during transcription:', error);
	transcriptionDiv.innerHTML = `Error: ${error.message}`;
	} finally {
	transcribeButton.disabled = false;
	transcribeButton.textContent = 'Transcribe';
	}
	}

	// Audio Chunking Functions (from original code)

	async function splitAudioFile(file) {
	const audioContext = new (window.AudioContext \|\| window.webkitAudioContext)();
	const reader = new FileReader();

	return new Promise((resolve, reject) => {
	reader.onload = async function (event) {
	try {
	const audioBuffer = await audioContext.decodeAudioData(event.target.result);
	const chunks = [];
	const chunkSize = 24 * 1024 * 1024; // 25MB in bytes
	const sampleRate = audioBuffer.sampleRate;
	const bytesPerSample = 4; // Assuming 32-bit audio
	const samplesPerChunk = Math.floor(chunkSize / bytesPerSample);
	const chunksCount = Math.ceil(audioBuffer.length / samplesPerChunk);

	for (let i = 0; i < chunksCount; i++) {
	const startSample = i * samplesPerChunk;
	const endSample = Math.min((i + 1) * samplesPerChunk, audioBuffer.length);
	const chunkDuration = (endSample - startSample) / sampleRate;

	const chunkBuffer = audioContext.createBuffer(
	audioBuffer.numberOfChannels,
	endSample - startSample,
	sampleRate
	);

	for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
	const chunkChannelData = chunkBuffer.getChannelData(channel);
	audioBuffer.copyFromChannel(chunkChannelData, channel, startSample);
	}

	const wavBlob = await bufferToWav(chunkBuffer);
	chunks.push(new File([wavBlob], `chunk_${i + 1}.wav`, { type: 'audio/wav' }));
	}

	resolve(chunks);
	} catch (error) {
	reject(error);
	}
	};

	reader.onerror = reject;
	reader.readAsArrayBuffer(file);
	});
	}

	function bufferToWav(buffer) {
	const interleaved = new Float32Array(buffer.length * buffer.numberOfChannels);
	for (let channel = 0; channel < buffer.numberOfChannels; channel++) {
	const channelData = buffer.getChannelData(channel);
	for (let i = 0; i < buffer.length; i++) {
	interleaved[i * buffer.numberOfChannels + channel] = channelData[i];
	}
	}

	const wavBuffer = new ArrayBuffer(44 + interleaved.length * 2);
	const view = new DataView(wavBuffer);

	writeString(view, 0, 'RIFF');
	view.setUint32(4, 36 + interleaved.length * 2, true);
	writeString(view, 8, 'WAVE');
	writeString(view, 12, 'fmt ');
	view.setUint32(16, 16, true);
	view.setUint16(20, 1, true);
	view.setUint16(22, buffer.numberOfChannels, true);
	view.setUint32(24, buffer.sampleRate, true);
	view.setUint32(28, buffer.sampleRate * 4, true);
	view.setUint16(32, buffer.numberOfChannels * 2, true);
	view.setUint16(34, 16, true);
	writeString(view, 36, 'data');
	view.setUint32(40, interleaved.length * 2, true);

	const floatTo16BitPCM = (output, offset, input) => {
	for (let i = 0; i < input.length; i++, offset += 2) {
	const s = Math.max(-1, Math.min(1, input[i]));
	output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
	}
	};
	floatTo16BitPCM(view, 44, interleaved);

	return new Blob([wavBuffer], { type: 'audio/wav' });
	}

	function writeString(view, offset, string) {
	for (let i = 0; i < string.length; i++) {
	view.setUint8(offset + i, string.charCodeAt(i));
	}
	}

	function copyToClipboard() {
	const transcriptionText = document.getElementById('transcription').innerText;
	navigator.clipboard.writeText(transcriptionText)
	.then(() => {
	alert('Transcription copied to clipboard!');
	})
	.catch(err => {
	console.error('Failed to copy transcription: ', err);
	});
	}
	</script>
	</body>
	</html>