umarmajeedofficial commited on
Commit
376049c
1 Parent(s): 120b558

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import requests
4
+ import pdfplumber
5
+ import torch
6
+ import ffmpeg
7
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
8
+ import streamlit as st
9
+ from reportlab.lib.pagesizes import letter
10
+ from reportlab.pdfgen import canvas
11
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
12
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
+
14
+ # Suppress warnings
15
+ import warnings
16
+ warnings.filterwarnings("ignore")
17
+
18
+ # Define paths for temporary files
19
+ temp_audio_folder = "/tmp/audios/"
20
+ temp_pdf_path = "/tmp/uploaded_pdf.pdf"
21
+ temp_output_pdf_path = "/tmp/response_output.pdf"
22
+
23
+ # Ensure temporary directories exist
24
+ os.makedirs(temp_audio_folder, exist_ok=True)
25
+
26
+ # Setup models
27
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
28
+ whisper_model_id = "openai/whisper-medium"
29
+
30
+ # Load Whisper model and processor
31
+ whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id)
32
+ whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)
33
+
34
+ # Create Whisper pipeline
35
+ whisper_pipe = pipeline(
36
+ "automatic-speech-recognition",
37
+ model=whisper_model,
38
+ tokenizer=whisper_processor.tokenizer,
39
+ feature_extractor=whisper_processor.feature_extractor,
40
+ device=device
41
+ )
42
+
43
+ # Granite model URL and headers
44
+ granite_url = "https://us-south.ml.cloud.ibm.com/ml/v1/text/generation?version=2023-05-29"
45
+ granite_headers = {
46
+ "Accept": "application/json",
47
+ "Content-Type": "application/json",
48
+ "Authorization": "Bearer eyJraWQiOiIyMDI0MDgwMzA4NDEiLCJhbGciOiJSUzI1NiJ9.eyJpYW1faWQiOiJJQk1pZC02OTQwMDBJTlNIIiwiaWQiOiJJQk1pZC02OTQwMDBJTlNIIiwicmVhbG1pZCI6IklCTWlkIiwianRpIjoiNzIxMTJlNWUtOTRhNC00MTY1LTk2ZDgtMTAxYTg0YjhlNmQxIiwiaWRlbnRpZmllciI6IjY5NDAwMElOU0giLCJnaXZlbl9uYW1lIjoiVW1hciIsImZhbWlseV9uYW1lIjoiTWFqZWVkIiwibmFtZSI6IlVtYXIgTWFqZWVkIiwiZW1haWwiOiJ1bWFybWFqZWVkb2ZmaWNpYWxAZ21haWwuY29tIiwic3ViIjoidW1hcm1hamVlZG9mZmljaWFsQGdtYWlsLmNvbSIsImF1dGhuIjp7InN1YiI6InVtYXJtYWplZWRvZmZpY2lhbEBnbWFpbC5jb20iLCJpYW1faWQiOiJJQk1pZC02OTQwMDBJTlNIIiwibmFtZSI6IlVtYXIgTWFqZWVkIiwiZ2l2ZW5fbmFtZSI6IlVtYXIiLCJmYW1pbHlfbmFtZSI6Ik1hamVlZCIsImVtYWlsIjoidW1hcm1hamVlZG9mZmljaWFsQGdtYWlsLmNvbSJ9LCJhY2NvdW50Ijp7InZhbGlkIjp0cnVlLCJic3MiOiIyZTY5MjI1ZjNmMjc0Nzc2ODkwMGE2MGQ5MDBkM2UzNyIsImltc191c2VyX2lkIjoiMTI2MjI5MTciLCJmcm96ZW4iOnRydWUsImltcyI6IjI3NDQzNDQifSwiaWF0IjoxNzI0NjM3ODUyLCJleHAiOjE3MjQ2NDE0NTIsImlzcyI6Imh0dHBzOi8vaWFtLmNsb3VkLmlibS5jb20vaWRlbnRpdHkiLCJncmFudF90eXBlIjoidXJuOmlibTpwYXJhbXM6b2F1dGg6Z3JhbnQtdHlwZTphcGlrZXkiLCJzY29wZSI6ImlibSBvcGVuaWQiLCJjbGllbnRfaWQiOiJkZWZhdWx0IiwiYWNyIjoxLCJhbXIiOlsicHdkIl19.ZKnoQjFyXxXRtsP5cMfv0H1Measiz3Wd5D1srfV4i4QLRwHy6rR6X8up-xNT-O9tccWNo2z5fhPaihz-5n_qPbGnM3-CfZemTr0d9PnbmgKLejsUy3EywPu3Q87J1bjeE2XY0Zm7Sjf9w-TCyUHeFmbBGruv60rzQXXuUd802YInpAcvKaD3_QzVGHtZQTqGmohSWTF8y879B0TfDFD3R3g8GSUchl5ith3qqUGms3IWy8-DRNdkn53M9qMeRrOLAI36v8J-kZdNXbPoG86DiFThvHTNSZj_Sbc6Iiu2N-J9T6ygKNVDH_1tcPJckfAoStVstGugm0i3spun5HsE6w" # Replace with your actual API key
49
+ }
50
+
51
+ # Function to transcribe audio files
52
+ def transcribe_audio(file_path):
53
+ result = whisper_pipe(file_path)
54
+ return result['text']
55
+
56
+ # Function to extract text and questions from PDF
57
+ def extract_text_from_pdf(pdf_path):
58
+ text = ""
59
+ questions = []
60
+ with pdfplumber.open(pdf_path) as pdf:
61
+ for page in pdf.pages:
62
+ page_text = page.extract_text()
63
+ if page_text:
64
+ text += page_text
65
+ questions += [line.strip() for line in page_text.split("\n") if line.strip()]
66
+ return text, questions
67
+
68
+ # Function to generate form data with Granite
69
+ def generate_form_data(text, questions):
70
+ question_list = "\n".join(f"- {question}" for question in questions)
71
+ body = {
72
+ "input": f"""The following text is a transcript from an audio recording. Read the text and extract the information needed to fill out the following form.\n\nText: {text}\n\nForm Questions:\n{question_list}\n\nExtracted Form Data:""",
73
+ "parameters": {
74
+ "decoding_method": "sample",
75
+ "max_new_tokens": 900,
76
+ "temperature": 0.7,
77
+ "top_k": 50,
78
+ "top_p": 1,
79
+ "repetition_penalty": 1.05
80
+ },
81
+ "model_id": "ibm/granite-13b-chat-v2",
82
+ "project_id": "698f0da7-6b34-4642-8540-978e70e85c8e", # Replace with your actual project ID
83
+ "moderations": {
84
+ "hap": {
85
+ "input": {
86
+ "enabled": True,
87
+ "threshold": 0.5,
88
+ "mask": {"remove_entity_value": True}
89
+ },
90
+ "output": {
91
+ "enabled": True,
92
+ "threshold": 0.5,
93
+ "mask": {"remove_entity_value": True}
94
+ }
95
+ }
96
+ }
97
+ }
98
+ response = requests.post(granite_url, headers=granite_headers, json=body)
99
+ if response.status_code != 200:
100
+ raise Exception("Non-200 response: " + str(response.text))
101
+ data = response.json()
102
+ return data['results'][0]['generated_text'].strip()
103
+
104
+ # Function to save responses to PDF
105
+ def save_responses_to_pdf(responses, output_pdf_path):
106
+ document = SimpleDocTemplate(output_pdf_path, pagesize=letter)
107
+ styles = getSampleStyleSheet()
108
+
109
+ # Custom style for numbered responses
110
+ number_style = ParagraphStyle(
111
+ name='NumberedStyle',
112
+ parent=styles['BodyText'],
113
+ fontSize=10,
114
+ spaceAfter=12
115
+ )
116
+
117
+ content = []
118
+
119
+ for index, response in enumerate(responses, start=1):
120
+ # Add the response number and content
121
+ heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2'])
122
+ response_text = Paragraph(response.replace("\n", "<br/>"), number_style)
123
+
124
+ content.append(heading)
125
+ content.append(Spacer(1, 6)) # Space between heading and response
126
+ content.append(response_text)
127
+ content.append(Spacer(1, 18)) # Space between responses
128
+
129
+ document.build(content)
130
+
131
+ # Set up the Streamlit app
132
+ st.title("FILL IT")
133
+
134
+ # Upload multiple audio files
135
+ uploaded_audios = st.file_uploader("Upload audio files", type=["wav", "mp3"], accept_multiple_files=True)
136
+
137
+ # Upload PDF file
138
+ uploaded_pdf = st.file_uploader("Upload a PDF file with questions", type=["pdf"])
139
+
140
+ # Output box to display responses
141
+ output_box = st.empty()
142
+
143
+ # Button to start processing
144
+ if st.button("Start Processing"):
145
+ if uploaded_audios and uploaded_pdf:
146
+ responses = []
147
+
148
+ # Read uploaded PDF file
149
+ pdf_bytes = uploaded_pdf.read()
150
+ with open(temp_pdf_path, "wb") as f:
151
+ f.write(pdf_bytes)
152
+
153
+ # Process each uploaded audio file
154
+ for audio_file in uploaded_audios:
155
+ audio_bytes = audio_file.read()
156
+ audio_path = os.path.join(temp_audio_folder, audio_file.name)
157
+ with open(audio_path, "wb") as f:
158
+ f.write(audio_bytes)
159
+
160
+ # Transcribe audio
161
+ transcription = transcribe_audio(audio_path)
162
+
163
+ # Extract text and questions from PDF
164
+ pdf_text, questions = extract_text_from_pdf(temp_pdf_path)
165
+
166
+ # Generate form data with Granite
167
+ form_data = generate_form_data(transcription, questions)
168
+ responses.append(form_data)
169
+
170
+ # Display responses in output box
171
+ output_box.write("Processing completed. Here are the results:")
172
+ for index, response in enumerate(responses, start=1):
173
+ output_box.write(f"File {index}:\n{response}\n")
174
+
175
+ # Save responses to PDF
176
+ save_responses_to_pdf(responses, temp_output_pdf_path)
177
+
178
+ # Button to download the PDF with responses
179
+ with open(temp_output_pdf_path, "rb") as f:
180
+ st.download_button(
181
+ label="Download Responses as PDF",
182
+ data=f,
183
+ file_name="response_output.pdf",
184
+ mime="application/pdf"
185
+ )
186
+ else:
187
+ st.warning("Please upload both audio files and a PDF file.")