import gradio as gr import fitz # PyMuPDF for PDF extraction from huggingface_hub import InferenceClient import os # Function to extract text from PDF def extract_text_from_pdf(pdf_path): try: doc = fitz.open(pdf_path) text = "" for page in doc: text += page.get_text() return text except Exception as e: print(f"Error extracting text from PDF: {str(e)}") return None # Function to send extracted text to the model and get the details def extract_invoice_details_from_text(text, prompt, model_name, api_key): try: # Initialize the InferenceClient with the provided API key client = InferenceClient(api_key=api_key) # Send prompt to the selected model response = client.chat.completions.create( model=model_name, # Use the model selected by the user messages=[{"role": "user", "content": prompt + text}], # Append extracted PDF text to prompt max_tokens=2000 # Adjust token size as needed ) # Ensure the response format is correct and extract the content return response['choices'][0]['message']['content'] except Exception as e: print(f"Error occurred while processing the request: {str(e)}") return "Error occurred while processing the request." # Main function to process PDFs def process_files(files, prompt, model_name, api_key): if not api_key.strip(): return "Please provide a valid Hugging Face API key." default_prompt = ( "Can you please parse below details from attached documents in excel format?\n" "information to extract: DATE, NAME & ADDRESS OF BUYER, Item Code, HSN CODE, UOM, Qty, Unit Price\n\nInvoice text:\n" ) user_prompt = prompt.strip() if prompt.strip() else default_prompt all_extracted_data = [] for file in files: try: pdf_text = extract_text_from_pdf(file.name) if not pdf_text: all_extracted_data.append(f"Failed to extract text from {file.name}") continue # Get details from the model based on extracted text and the provided prompt extracted_text = extract_invoice_details_from_text(pdf_text, user_prompt, model_name, api_key) all_extracted_data.append(f"File: {file.name}\n{extracted_text.strip()}") except Exception as e: print(f"Error processing file {file.name}: {str(e)}") all_extracted_data.append(f"Error processing {file.name}: {str(e)}") return "\n\n".join(all_extracted_data) # Define the Gradio app interface with gr.Blocks() as app: gr.Markdown("# Information Parser App") gr.Markdown("Upload PDF with text, provide a prompt, your Hugging Face API key, and select a model to extract details.") with gr.Row(): file_input = gr.File(label="Upload PDF(s)", file_types=[".pdf"], file_count="multiple") model_dropdown = gr.Dropdown( label="Select Model", choices=["Qwen/Qwen2.5-Coder-32B-Instruct", "gpt-3.5-turbo", "other-model-name"], value="Qwen/Qwen2.5-Coder-32B-Instruct" ) prompt_input = gr.Textbox( label="Custom Prompt", placeholder="Enter your custom prompt here (leave blank to use default prompt).", lines=3 ) api_key_input = gr.Textbox( label="Hugging Face API Key", placeholder="Enter your Hugging Face API key here.", type="password", lines=1 ) extract_button = gr.Button("Extract Details from PDF") output_box = gr.Textbox( label="Extracted Data", placeholder="The extracted details will appear here.", lines=15, interactive=False ) extract_button.click( process_files, inputs=[file_input, prompt_input, model_dropdown, api_key_input], outputs=output_box ) # Launch the app app.launch()