import gradio as gr
import fitz  # PyMuPDF for PDF extraction
from huggingface_hub import InferenceClient
import os

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text
    except Exception as e:
        print(f"Error extracting text from PDF: {str(e)}")
        return None

# Function to send extracted text to the model and get the details
def extract_invoice_details_from_text(text, prompt, model_name, api_key):
    try:
        # Initialize the InferenceClient with the provided API key
        client = InferenceClient(api_key=api_key)

        # Send prompt to the selected model
        response = client.chat.completions.create(
            model=model_name,  # Use the model selected by the user
            messages=[{"role": "user", "content": prompt + text}],  # Append extracted PDF text to prompt
            max_tokens=2000  # Adjust token size as needed
        )
        # Ensure the response format is correct and extract the content
        return response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error occurred while processing the request: {str(e)}")
        return "Error occurred while processing the request."

# Main function to process PDFs
def process_files(files, prompt, model_name, api_key):
    if not api_key.strip():
        return "Please provide a valid Hugging Face API key."

    default_prompt = (
        "Can you please parse below details from attached documents in excel format?\n"
        "information to extract: DATE, NAME & ADDRESS OF BUYER, Item Code, HSN CODE, UOM, Qty, Unit Price\n\nInvoice text:\n"
    )
    user_prompt = prompt.strip() if prompt.strip() else default_prompt
    all_extracted_data = []

    for file in files:
        try:
            pdf_text = extract_text_from_pdf(file.name)
            if not pdf_text:
                all_extracted_data.append(f"Failed to extract text from {file.name}")
                continue

            # Get details from the model based on extracted text and the provided prompt
            extracted_text = extract_invoice_details_from_text(pdf_text, user_prompt, model_name, api_key)
            all_extracted_data.append(f"File: {file.name}\n{extracted_text.strip()}")
        except Exception as e:
            print(f"Error processing file {file.name}: {str(e)}")
            all_extracted_data.append(f"Error processing {file.name}: {str(e)}")

    return "\n\n".join(all_extracted_data)

# Define the Gradio app interface
with gr.Blocks() as app:
    gr.Markdown("# Information Parser App")
    gr.Markdown("Upload PDF with text, provide a prompt, your Hugging Face API key, and select a model to extract details.")

    with gr.Row():
        file_input = gr.File(label="Upload PDF(s)", file_types=[".pdf"], file_count="multiple")
        model_dropdown = gr.Dropdown(
            label="Select Model",
            choices=["Qwen/Qwen2.5-Coder-32B-Instruct", "gpt-3.5-turbo", "other-model-name"],
            value="Qwen/Qwen2.5-Coder-32B-Instruct"
        )
    prompt_input = gr.Textbox(
        label="Custom Prompt",
        placeholder="Enter your custom prompt here (leave blank to use default prompt).",
        lines=3
    )
    api_key_input = gr.Textbox(
        label="Hugging Face API Key",
        placeholder="Enter your Hugging Face API key here.",
        type="password",
        lines=1
    )
    extract_button = gr.Button("Extract Details from PDF")
    output_box = gr.Textbox(
        label="Extracted Data",
        placeholder="The extracted details will appear here.",
        lines=15,
        interactive=False
    )

    extract_button.click(
        process_files,
        inputs=[file_input, prompt_input, model_dropdown, api_key_input],
        outputs=output_box
    )

# Launch the app
app.launch()