umarmajeedofficial commited on
Commit
f255904
1 Parent(s): 763957c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -0
app.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import warnings
3
+ import torch
4
+ import soundfile as sf
5
+ from scipy.signal import resample
6
+ from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
7
+ import pdfplumber
8
+ from reportlab.lib.pagesizes import letter
9
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
10
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
11
+ import streamlit as st
12
+ import io
13
+ import numpy as np
14
+
15
+ # Suppress warnings globally
16
+ warnings.filterwarnings("ignore")
17
+
18
+ # Setup models
19
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
20
+ whisper_model_id = "openai/whisper-medium"
21
+
22
+ # Load Whisper model and processor
23
+ whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id)
24
+ whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)
25
+
26
+ # Create Whisper pipeline
27
+ whisper_pipe = pipeline(
28
+ "automatic-speech-recognition",
29
+ model=whisper_model,
30
+ tokenizer=whisper_processor.tokenizer,
31
+ feature_extractor=whisper_processor.feature_extractor,
32
+ device=device
33
+ )
34
+
35
+ # Setup FLAN-T5 model and tokenizer
36
+ flan_t5_model_id = "google/flan-t5-large"
37
+
38
+ try:
39
+ flan_t5_tokenizer = T5Tokenizer.from_pretrained(flan_t5_model_id)
40
+ flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
41
+ except ImportError as e:
42
+ st.error(f"ImportError: {e}")
43
+ st.stop()
44
+ except Exception as e:
45
+ st.error(f"An error occurred while loading models: {e}")
46
+ st.stop()
47
+
48
+ # Function to resample audio to 16000 Hz
49
+ def resample_audio(audio_data, original_sample_rate, target_sample_rate=16000):
50
+ num_samples = int(len(audio_data) * float(target_sample_rate) / original_sample_rate)
51
+ resampled_audio = resample(audio_data, num_samples)
52
+ return resampled_audio
53
+
54
+ # Function to transcribe audio files
55
+ def transcribe_audio(audio_file):
56
+ try:
57
+ # Read the audio file
58
+ audio_data, sample_rate = sf.read(audio_file)
59
+
60
+ # Resample if necessary
61
+ if sample_rate != 16000:
62
+ audio_data = resample_audio(audio_data, sample_rate, 16000)
63
+
64
+ # Process the audio with Whisper model
65
+ inputs = whisper_processor(audio_data, sampling_rate=16000, return_tensors="pt")
66
+ result = whisper_pipe(inputs)
67
+ return result['text']
68
+ except Exception as e:
69
+ st.error(f"Error in audio transcription: {e}")
70
+ return "Error during transcription"
71
+
72
+ # Function to extract text and questions from PDF
73
+ def extract_text_from_pdf(pdf_file):
74
+ text = ""
75
+ questions = []
76
+ try:
77
+ with pdfplumber.open(pdf_file) as pdf:
78
+ for page in pdf.pages:
79
+ page_text = page.extract_text()
80
+ if page_text:
81
+ text += page_text
82
+ lines = page_text.split("\n")
83
+ for line in lines:
84
+ if line.strip() and line.strip()[0].isdigit():
85
+ questions.append(line.strip())
86
+ except Exception as e:
87
+ st.error(f"Error extracting text from PDF: {e}")
88
+ return text, questions
89
+
90
+ # Function to generate form data with FLAN-T5
91
+ def generate_form_data(text, questions):
92
+ responses = []
93
+ for question in questions:
94
+ input_text = f"""The following text is a transcript from an audio recording. Read the text and answer the following question in a complete sentence.\n\nText: {text}\n\nQuestion: {question}\n\nAnswer:"""
95
+
96
+ inputs = flan_t5_tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True)
97
+ with torch.no_grad():
98
+ outputs = flan_t5_model.generate(**inputs, max_length=100)
99
+
100
+ generated_text = flan_t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
101
+
102
+ if not generated_text.strip():
103
+ generated_text = "The answer to this question is not present in the script."
104
+ elif len(generated_text.strip()) < 10:
105
+ input_text = f"""Based on the following transcript, provide a more detailed answer to the question.\n\nText: {text}\n\nQuestion: {question}\n\nAnswer:"""
106
+ inputs = flan_t5_tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True)
107
+ outputs = flan_t5_model.generate(**inputs, max_length=100)
108
+ generated_text = flan_t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
109
+
110
+ responses.append(f"Question: {question}\nAnswer: {generated_text.strip()}")
111
+
112
+ return "\n\n".join(responses)
113
+
114
+ # Function to save responses to PDF
115
+ def save_responses_to_pdf(responses, output_pdf_path):
116
+ document = SimpleDocTemplate(output_pdf_path, pagesize=letter)
117
+ styles = getSampleStyleSheet()
118
+
119
+ response_style = ParagraphStyle(
120
+ name='ResponseStyle',
121
+ parent=styles['BodyText'],
122
+ fontSize=10,
123
+ spaceAfter=12
124
+ )
125
+
126
+ content = []
127
+ for index, response in enumerate(responses, start=1):
128
+ heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2'])
129
+ response_text = Paragraph(response.replace("\n", "<br/>"), response_style)
130
+
131
+ content.append(heading)
132
+ content.append(Spacer(1, 6))
133
+ content.append(response_text)
134
+ content.append(Spacer(1, 18))
135
+
136
+ document.build(content)
137
+
138
+ # Streamlit UI
139
+ st.title("FillUp by Umar Majeed")
140
+
141
+ # Upload audio files
142
+ audio_files = st.file_uploader("Upload multiple audio files", type=["wav", "mp3"], accept_multiple_files=True)
143
+
144
+ # Upload PDF file
145
+ pdf_file = st.file_uploader("Upload a PDF file", type="pdf")
146
+
147
+ if st.button("Process"):
148
+ if audio_files and pdf_file:
149
+ responses = []
150
+ pdf_text, pdf_questions = extract_text_from_pdf(pdf_file)
151
+
152
+ for audio_file in audio_files:
153
+ transcribed_text = transcribe_audio(audio_file)
154
+ form_data = generate_form_data(transcribed_text, pdf_questions)
155
+ responses.append(form_data)
156
+ st.write(f"File {len(responses)}:\n{form_data}\n")
157
+
158
+ output_pdf_path = "/tmp/response_output.pdf"
159
+ save_responses_to_pdf(responses, output_pdf_path)
160
+ st.write("Responses have been generated. You can download the result below.")
161
+
162
+ with open(output_pdf_path, "rb") as file:
163
+ st.download_button(
164
+ label="Download PDF",
165
+ data=file,
166
+ file_name="response_output.pdf",
167
+ mime="application/pdf"
168
+ )
169
+ else:
170
+ st.error("Please upload both audio files and a PDF file.")