Spaces:

umarmajeedofficial
/

FillUp

Sleeping

App Files Files Community

FillUp / app.py

umarmajeedofficial

Create app.py

f255904 verified about 2 months ago

raw

history blame

6.39 kB

	import os
	import warnings
	import torch
	import soundfile as sf
	from scipy.signal import resample
	from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
	import pdfplumber
	from reportlab.lib.pagesizes import letter
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	import streamlit as st
	import io
	import numpy as np

	# Suppress warnings globally
	warnings.filterwarnings("ignore")

	# Setup models
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	whisper_model_id = "openai/whisper-medium"

	# Load Whisper model and processor
	whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id)
	whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)

	# Create Whisper pipeline
	whisper_pipe = pipeline(
	"automatic-speech-recognition",
	model=whisper_model,
	tokenizer=whisper_processor.tokenizer,
	feature_extractor=whisper_processor.feature_extractor,
	device=device
	)

	# Setup FLAN-T5 model and tokenizer
	flan_t5_model_id = "google/flan-t5-large"

	try:
	flan_t5_tokenizer = T5Tokenizer.from_pretrained(flan_t5_model_id)
	flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
	except ImportError as e:
	st.error(f"ImportError: {e}")
	st.stop()
	except Exception as e:
	st.error(f"An error occurred while loading models: {e}")
	st.stop()

	# Function to resample audio to 16000 Hz
	def resample_audio(audio_data, original_sample_rate, target_sample_rate=16000):
	num_samples = int(len(audio_data) * float(target_sample_rate) / original_sample_rate)
	resampled_audio = resample(audio_data, num_samples)
	return resampled_audio

	# Function to transcribe audio files
	def transcribe_audio(audio_file):
	try:
	# Read the audio file
	audio_data, sample_rate = sf.read(audio_file)

	# Resample if necessary
	if sample_rate != 16000:
	audio_data = resample_audio(audio_data, sample_rate, 16000)

	# Process the audio with Whisper model
	inputs = whisper_processor(audio_data, sampling_rate=16000, return_tensors="pt")
	result = whisper_pipe(inputs)
	return result['text']
	except Exception as e:
	st.error(f"Error in audio transcription: {e}")
	return "Error during transcription"

	# Function to extract text and questions from PDF
	def extract_text_from_pdf(pdf_file):
	text = ""
	questions = []
	try:
	with pdfplumber.open(pdf_file) as pdf:
	for page in pdf.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	lines = page_text.split("\n")
	for line in lines:
	if line.strip() and line.strip()[0].isdigit():
	questions.append(line.strip())
	except Exception as e:
	st.error(f"Error extracting text from PDF: {e}")
	return text, questions

	# Function to generate form data with FLAN-T5
	def generate_form_data(text, questions):
	responses = []
	for question in questions:
	input_text = f"""The following text is a transcript from an audio recording. Read the text and answer the following question in a complete sentence.\n\nText: {text}\n\nQuestion: {question}\n\nAnswer:"""

	inputs = flan_t5_tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True)
	with torch.no_grad():
	outputs = flan_t5_model.generate(**inputs, max_length=100)

	generated_text = flan_t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

	if not generated_text.strip():
	generated_text = "The answer to this question is not present in the script."
	elif len(generated_text.strip()) < 10:
	input_text = f"""Based on the following transcript, provide a more detailed answer to the question.\n\nText: {text}\n\nQuestion: {question}\n\nAnswer:"""
	inputs = flan_t5_tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True)
	outputs = flan_t5_model.generate(**inputs, max_length=100)
	generated_text = flan_t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

	responses.append(f"Question: {question}\nAnswer: {generated_text.strip()}")

	return "\n\n".join(responses)

	# Function to save responses to PDF
	def save_responses_to_pdf(responses, output_pdf_path):
	document = SimpleDocTemplate(output_pdf_path, pagesize=letter)
	styles = getSampleStyleSheet()

	response_style = ParagraphStyle(
	name='ResponseStyle',
	parent=styles['BodyText'],
	fontSize=10,
	spaceAfter=12
	)

	content = []
	for index, response in enumerate(responses, start=1):
	heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2'])
	response_text = Paragraph(response.replace("\n", "<br/>"), response_style)

	content.append(heading)
	content.append(Spacer(1, 6))
	content.append(response_text)
	content.append(Spacer(1, 18))

	document.build(content)

	# Streamlit UI
	st.title("FillUp by Umar Majeed")

	# Upload audio files
	audio_files = st.file_uploader("Upload multiple audio files", type=["wav", "mp3"], accept_multiple_files=True)

	# Upload PDF file
	pdf_file = st.file_uploader("Upload a PDF file", type="pdf")

	if st.button("Process"):
	if audio_files and pdf_file:
	responses = []
	pdf_text, pdf_questions = extract_text_from_pdf(pdf_file)

	for audio_file in audio_files:
	transcribed_text = transcribe_audio(audio_file)
	form_data = generate_form_data(transcribed_text, pdf_questions)
	responses.append(form_data)
	st.write(f"File {len(responses)}:\n{form_data}\n")

	output_pdf_path = "/tmp/response_output.pdf"
	save_responses_to_pdf(responses, output_pdf_path)
	st.write("Responses have been generated. You can download the result below.")

	with open(output_pdf_path, "rb") as file:
	st.download_button(
	label="Download PDF",
	data=file,
	file_name="response_output.pdf",
	mime="application/pdf"
	)
	else:
	st.error("Please upload both audio files and a PDF file.")