Spaces:
Sleeping
Sleeping
import io | |
import os | |
import requests | |
import pdfplumber | |
import torch | |
import ffmpeg | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
import streamlit as st | |
from reportlab.lib.pagesizes import letter | |
from reportlab.pdfgen import canvas | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer | |
# Suppress warnings | |
import warnings | |
warnings.filterwarnings("ignore") | |
# Define paths for temporary files | |
temp_audio_folder = "/tmp/audios/" | |
temp_pdf_path = "/tmp/uploaded_pdf.pdf" | |
temp_output_pdf_path = "/tmp/response_output.pdf" | |
# Ensure temporary directories exist | |
os.makedirs(temp_audio_folder, exist_ok=True) | |
# Setup models | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
whisper_model_id = "openai/whisper-medium" | |
# Load Whisper model and processor | |
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id) | |
whisper_processor = AutoProcessor.from_pretrained(whisper_model_id) | |
# Create Whisper pipeline | |
whisper_pipe = pipeline( | |
"automatic-speech-recognition", | |
model=whisper_model, | |
tokenizer=whisper_processor.tokenizer, | |
feature_extractor=whisper_processor.feature_extractor, | |
device=device | |
) | |
# Granite model URL and headers | |
granite_url = "https://us-south.ml.cloud.ibm.com/ml/v1/text/generation?version=2023-05-29" | |
granite_headers = { | |
"Accept": "application/json", | |
"Content-Type": "application/json", | |
"Authorization": "Bearer eyJraWQiOiIyMDI0MDgwMzA4NDEiLCJhbGciOiJSUzI1NiJ9.eyJpYW1faWQiOiJJQk1pZC02OTQwMDBJTlNIIiwiaWQiOiJJQk1pZC02OTQwMDBJTlNIIiwicmVhbG1pZCI6IklCTWlkIiwianRpIjoiNzIxMTJlNWUtOTRhNC00MTY1LTk2ZDgtMTAxYTg0YjhlNmQxIiwiaWRlbnRpZmllciI6IjY5NDAwMElOU0giLCJnaXZlbl9uYW1lIjoiVW1hciIsImZhbWlseV9uYW1lIjoiTWFqZWVkIiwibmFtZSI6IlVtYXIgTWFqZWVkIiwiZW1haWwiOiJ1bWFybWFqZWVkb2ZmaWNpYWxAZ21haWwuY29tIiwic3ViIjoidW1hcm1hamVlZG9mZmljaWFsQGdtYWlsLmNvbSIsImF1dGhuIjp7InN1YiI6InVtYXJtYWplZWRvZmZpY2lhbEBnbWFpbC5jb20iLCJpYW1faWQiOiJJQk1pZC02OTQwMDBJTlNIIiwibmFtZSI6IlVtYXIgTWFqZWVkIiwiZ2l2ZW5fbmFtZSI6IlVtYXIiLCJmYW1pbHlfbmFtZSI6Ik1hamVlZCIsImVtYWlsIjoidW1hcm1hamVlZG9mZmljaWFsQGdtYWlsLmNvbSJ9LCJhY2NvdW50Ijp7InZhbGlkIjp0cnVlLCJic3MiOiIyZTY5MjI1ZjNmMjc0Nzc2ODkwMGE2MGQ5MDBkM2UzNyIsImltc191c2VyX2lkIjoiMTI2MjI5MTciLCJmcm96ZW4iOnRydWUsImltcyI6IjI3NDQzNDQifSwiaWF0IjoxNzI0NjM3ODUyLCJleHAiOjE3MjQ2NDE0NTIsImlzcyI6Imh0dHBzOi8vaWFtLmNsb3VkLmlibS5jb20vaWRlbnRpdHkiLCJncmFudF90eXBlIjoidXJuOmlibTpwYXJhbXM6b2F1dGg6Z3JhbnQtdHlwZTphcGlrZXkiLCJzY29wZSI6ImlibSBvcGVuaWQiLCJjbGllbnRfaWQiOiJkZWZhdWx0IiwiYWNyIjoxLCJhbXIiOlsicHdkIl19.ZKnoQjFyXxXRtsP5cMfv0H1Measiz3Wd5D1srfV4i4QLRwHy6rR6X8up-xNT-O9tccWNo2z5fhPaihz-5n_qPbGnM3-CfZemTr0d9PnbmgKLejsUy3EywPu3Q87J1bjeE2XY0Zm7Sjf9w-TCyUHeFmbBGruv60rzQXXuUd802YInpAcvKaD3_QzVGHtZQTqGmohSWTF8y879B0TfDFD3R3g8GSUchl5ith3qqUGms3IWy8-DRNdkn53M9qMeRrOLAI36v8J-kZdNXbPoG86DiFThvHTNSZj_Sbc6Iiu2N-J9T6ygKNVDH_1tcPJckfAoStVstGugm0i3spun5HsE6w" # Replace with your actual API key | |
} | |
# Function to transcribe audio files | |
def transcribe_audio(file_path): | |
result = whisper_pipe(file_path) | |
return result['text'] | |
# Function to extract text and questions from PDF | |
def extract_text_from_pdf(pdf_path): | |
text = "" | |
questions = [] | |
with pdfplumber.open(pdf_path) as pdf: | |
for page in pdf.pages: | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text | |
questions += [line.strip() for line in page_text.split("\n") if line.strip()] | |
return text, questions | |
# Function to generate form data with Granite | |
def generate_form_data(text, questions): | |
question_list = "\n".join(f"- {question}" for question in questions) | |
body = { | |
"input": f"""The following text is a transcript from an audio recording. Read the text and extract the information needed to fill out the following form.\n\nText: {text}\n\nForm Questions:\n{question_list}\n\nExtracted Form Data:""", | |
"parameters": { | |
"decoding_method": "sample", | |
"max_new_tokens": 900, | |
"temperature": 0.7, | |
"top_k": 50, | |
"top_p": 1, | |
"repetition_penalty": 1.05 | |
}, | |
"model_id": "ibm/granite-13b-chat-v2", | |
"project_id": "698f0da7-6b34-4642-8540-978e70e85c8e", # Replace with your actual project ID | |
"moderations": { | |
"hap": { | |
"input": { | |
"enabled": True, | |
"threshold": 0.5, | |
"mask": {"remove_entity_value": True} | |
}, | |
"output": { | |
"enabled": True, | |
"threshold": 0.5, | |
"mask": {"remove_entity_value": True} | |
} | |
} | |
} | |
} | |
response = requests.post(granite_url, headers=granite_headers, json=body) | |
if response.status_code != 200: | |
raise Exception("Non-200 response: " + str(response.text)) | |
data = response.json() | |
return data['results'][0]['generated_text'].strip() | |
# Function to save responses to PDF | |
def save_responses_to_pdf(responses, output_pdf_path): | |
document = SimpleDocTemplate(output_pdf_path, pagesize=letter) | |
styles = getSampleStyleSheet() | |
# Custom style for numbered responses | |
number_style = ParagraphStyle( | |
name='NumberedStyle', | |
parent=styles['BodyText'], | |
fontSize=10, | |
spaceAfter=12 | |
) | |
content = [] | |
for index, response in enumerate(responses, start=1): | |
# Add the response number and content | |
heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2']) | |
response_text = Paragraph(response.replace("\n", "<br/>"), number_style) | |
content.append(heading) | |
content.append(Spacer(1, 6)) # Space between heading and response | |
content.append(response_text) | |
content.append(Spacer(1, 18)) # Space between responses | |
document.build(content) | |
# Set up the Streamlit app | |
st.title("FILL IT") | |
# Upload multiple audio files | |
uploaded_audios = st.file_uploader("Upload audio files", type=["wav", "mp3"], accept_multiple_files=True) | |
# Upload PDF file | |
uploaded_pdf = st.file_uploader("Upload a PDF file with questions", type=["pdf"]) | |
# Output box to display responses | |
output_box = st.empty() | |
# Button to start processing | |
if st.button("Start Processing"): | |
if uploaded_audios and uploaded_pdf: | |
responses = [] | |
# Read uploaded PDF file | |
pdf_bytes = uploaded_pdf.read() | |
with open(temp_pdf_path, "wb") as f: | |
f.write(pdf_bytes) | |
# Process each uploaded audio file | |
for audio_file in uploaded_audios: | |
audio_bytes = audio_file.read() | |
audio_path = os.path.join(temp_audio_folder, audio_file.name) | |
with open(audio_path, "wb") as f: | |
f.write(audio_bytes) | |
# Transcribe audio | |
transcription = transcribe_audio(audio_path) | |
# Extract text and questions from PDF | |
pdf_text, questions = extract_text_from_pdf(temp_pdf_path) | |
# Generate form data with Granite | |
form_data = generate_form_data(transcription, questions) | |
responses.append(form_data) | |
# Display responses in output box | |
output_box.write("Processing completed. Here are the results:") | |
for index, response in enumerate(responses, start=1): | |
output_box.write(f"File {index}:\n{response}\n") | |
# Save responses to PDF | |
save_responses_to_pdf(responses, temp_output_pdf_path) | |
# Button to download the PDF with responses | |
with open(temp_output_pdf_path, "rb") as f: | |
st.download_button( | |
label="Download Responses as PDF", | |
data=f, | |
file_name="response_output.pdf", | |
mime="application/pdf" | |
) | |
else: | |
st.warning("Please upload both audio files and a PDF file.") |