Spaces:
Sleeping
Sleeping
File size: 8,005 Bytes
376049c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import io
import os
import requests
import pdfplumber
import torch
import ffmpeg
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import streamlit as st
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
# Suppress warnings
import warnings
warnings.filterwarnings("ignore")
# Define paths for temporary files
temp_audio_folder = "/tmp/audios/"
temp_pdf_path = "/tmp/uploaded_pdf.pdf"
temp_output_pdf_path = "/tmp/response_output.pdf"
# Ensure temporary directories exist
os.makedirs(temp_audio_folder, exist_ok=True)
# Setup models
device = "cuda:0" if torch.cuda.is_available() else "cpu"
whisper_model_id = "openai/whisper-medium"
# Load Whisper model and processor
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id)
whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)
# Create Whisper pipeline
whisper_pipe = pipeline(
"automatic-speech-recognition",
model=whisper_model,
tokenizer=whisper_processor.tokenizer,
feature_extractor=whisper_processor.feature_extractor,
device=device
)
# Granite model URL and headers
granite_url = "https://us-south.ml.cloud.ibm.com/ml/v1/text/generation?version=2023-05-29"
granite_headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Bearer eyJraWQiOiIyMDI0MDgwMzA4NDEiLCJhbGciOiJSUzI1NiJ9.eyJpYW1faWQiOiJJQk1pZC02OTQwMDBJTlNIIiwiaWQiOiJJQk1pZC02OTQwMDBJTlNIIiwicmVhbG1pZCI6IklCTWlkIiwianRpIjoiNzIxMTJlNWUtOTRhNC00MTY1LTk2ZDgtMTAxYTg0YjhlNmQxIiwiaWRlbnRpZmllciI6IjY5NDAwMElOU0giLCJnaXZlbl9uYW1lIjoiVW1hciIsImZhbWlseV9uYW1lIjoiTWFqZWVkIiwibmFtZSI6IlVtYXIgTWFqZWVkIiwiZW1haWwiOiJ1bWFybWFqZWVkb2ZmaWNpYWxAZ21haWwuY29tIiwic3ViIjoidW1hcm1hamVlZG9mZmljaWFsQGdtYWlsLmNvbSIsImF1dGhuIjp7InN1YiI6InVtYXJtYWplZWRvZmZpY2lhbEBnbWFpbC5jb20iLCJpYW1faWQiOiJJQk1pZC02OTQwMDBJTlNIIiwibmFtZSI6IlVtYXIgTWFqZWVkIiwiZ2l2ZW5fbmFtZSI6IlVtYXIiLCJmYW1pbHlfbmFtZSI6Ik1hamVlZCIsImVtYWlsIjoidW1hcm1hamVlZG9mZmljaWFsQGdtYWlsLmNvbSJ9LCJhY2NvdW50Ijp7InZhbGlkIjp0cnVlLCJic3MiOiIyZTY5MjI1ZjNmMjc0Nzc2ODkwMGE2MGQ5MDBkM2UzNyIsImltc191c2VyX2lkIjoiMTI2MjI5MTciLCJmcm96ZW4iOnRydWUsImltcyI6IjI3NDQzNDQifSwiaWF0IjoxNzI0NjM3ODUyLCJleHAiOjE3MjQ2NDE0NTIsImlzcyI6Imh0dHBzOi8vaWFtLmNsb3VkLmlibS5jb20vaWRlbnRpdHkiLCJncmFudF90eXBlIjoidXJuOmlibTpwYXJhbXM6b2F1dGg6Z3JhbnQtdHlwZTphcGlrZXkiLCJzY29wZSI6ImlibSBvcGVuaWQiLCJjbGllbnRfaWQiOiJkZWZhdWx0IiwiYWNyIjoxLCJhbXIiOlsicHdkIl19.ZKnoQjFyXxXRtsP5cMfv0H1Measiz3Wd5D1srfV4i4QLRwHy6rR6X8up-xNT-O9tccWNo2z5fhPaihz-5n_qPbGnM3-CfZemTr0d9PnbmgKLejsUy3EywPu3Q87J1bjeE2XY0Zm7Sjf9w-TCyUHeFmbBGruv60rzQXXuUd802YInpAcvKaD3_QzVGHtZQTqGmohSWTF8y879B0TfDFD3R3g8GSUchl5ith3qqUGms3IWy8-DRNdkn53M9qMeRrOLAI36v8J-kZdNXbPoG86DiFThvHTNSZj_Sbc6Iiu2N-J9T6ygKNVDH_1tcPJckfAoStVstGugm0i3spun5HsE6w" # Replace with your actual API key
}
# Function to transcribe audio files
def transcribe_audio(file_path):
result = whisper_pipe(file_path)
return result['text']
# Function to extract text and questions from PDF
def extract_text_from_pdf(pdf_path):
text = ""
questions = []
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text
questions += [line.strip() for line in page_text.split("\n") if line.strip()]
return text, questions
# Function to generate form data with Granite
def generate_form_data(text, questions):
question_list = "\n".join(f"- {question}" for question in questions)
body = {
"input": f"""The following text is a transcript from an audio recording. Read the text and extract the information needed to fill out the following form.\n\nText: {text}\n\nForm Questions:\n{question_list}\n\nExtracted Form Data:""",
"parameters": {
"decoding_method": "sample",
"max_new_tokens": 900,
"temperature": 0.7,
"top_k": 50,
"top_p": 1,
"repetition_penalty": 1.05
},
"model_id": "ibm/granite-13b-chat-v2",
"project_id": "698f0da7-6b34-4642-8540-978e70e85c8e", # Replace with your actual project ID
"moderations": {
"hap": {
"input": {
"enabled": True,
"threshold": 0.5,
"mask": {"remove_entity_value": True}
},
"output": {
"enabled": True,
"threshold": 0.5,
"mask": {"remove_entity_value": True}
}
}
}
}
response = requests.post(granite_url, headers=granite_headers, json=body)
if response.status_code != 200:
raise Exception("Non-200 response: " + str(response.text))
data = response.json()
return data['results'][0]['generated_text'].strip()
# Function to save responses to PDF
def save_responses_to_pdf(responses, output_pdf_path):
document = SimpleDocTemplate(output_pdf_path, pagesize=letter)
styles = getSampleStyleSheet()
# Custom style for numbered responses
number_style = ParagraphStyle(
name='NumberedStyle',
parent=styles['BodyText'],
fontSize=10,
spaceAfter=12
)
content = []
for index, response in enumerate(responses, start=1):
# Add the response number and content
heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2'])
response_text = Paragraph(response.replace("\n", "<br/>"), number_style)
content.append(heading)
content.append(Spacer(1, 6)) # Space between heading and response
content.append(response_text)
content.append(Spacer(1, 18)) # Space between responses
document.build(content)
# Set up the Streamlit app
st.title("FILL IT")
# Upload multiple audio files
uploaded_audios = st.file_uploader("Upload audio files", type=["wav", "mp3"], accept_multiple_files=True)
# Upload PDF file
uploaded_pdf = st.file_uploader("Upload a PDF file with questions", type=["pdf"])
# Output box to display responses
output_box = st.empty()
# Button to start processing
if st.button("Start Processing"):
if uploaded_audios and uploaded_pdf:
responses = []
# Read uploaded PDF file
pdf_bytes = uploaded_pdf.read()
with open(temp_pdf_path, "wb") as f:
f.write(pdf_bytes)
# Process each uploaded audio file
for audio_file in uploaded_audios:
audio_bytes = audio_file.read()
audio_path = os.path.join(temp_audio_folder, audio_file.name)
with open(audio_path, "wb") as f:
f.write(audio_bytes)
# Transcribe audio
transcription = transcribe_audio(audio_path)
# Extract text and questions from PDF
pdf_text, questions = extract_text_from_pdf(temp_pdf_path)
# Generate form data with Granite
form_data = generate_form_data(transcription, questions)
responses.append(form_data)
# Display responses in output box
output_box.write("Processing completed. Here are the results:")
for index, response in enumerate(responses, start=1):
output_box.write(f"File {index}:\n{response}\n")
# Save responses to PDF
save_responses_to_pdf(responses, temp_output_pdf_path)
# Button to download the PDF with responses
with open(temp_output_pdf_path, "rb") as f:
st.download_button(
label="Download Responses as PDF",
data=f,
file_name="response_output.pdf",
mime="application/pdf"
)
else:
st.warning("Please upload both audio files and a PDF file.") |