import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import json
from paddleocr import PaddleOCR
from transformers import pipeline
import gradio as gr

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Predefined fields for extraction
FIELDS = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
          "Mobile Number", "Email", "Address", "Bank Account Details"]

def draw_boxes_on_image(image, data):
    """
    Draw bounding boxes and text on the image.
    
    Args:
        image (PIL Image): The input image.
        data (list): OCR results containing bounding boxes and detected text.
    
    Returns:
        PIL Image: The image with drawn boxes.
    """
    draw = ImageDraw.Draw(image)
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    for item_id, item in enumerate(data, start=1):
        bounding_box, (text, confidence) = item
        box = np.array(bounding_box).astype(int)
        draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
        draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
        draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
        draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
        text_position = (box[0][0], box[0][1] - 20)
        draw.text(text_position, f"{item_id}: {text} ({confidence:.2f})", fill="red", font=font)

    return image

def convert_to_json(results):
    """
    Converts the OCR results into a JSON object with bounding box IDs.
    
    Args:
        results (list): The list of OCR results containing bounding box coordinates, text, and confidence.
    
    Returns:
        dict: JSON data with bounding boxes and text.
    """
    json_data = []
    for item_id, result in enumerate(results, start=1):
        bounding_box = result[0]
        text = result[1][0]
        confidence = result[1][1]
        json_data.append({
            "id": item_id,
            "bounding_box": bounding_box,
            "text": text,
            "confidence": confidence
        })
    return json_data

def extract_field_value_pairs(text):
    """
    Extract field-value pairs from the text using a pre-trained NLP model.
    
    Args:
        text (str): The text to be processed.
    
    Returns:
        dict: A dictionary with field-value pairs.
    """
    nlp = pipeline("ner", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
    ner_results = []
    chunk_size = 256
    for i in range(0, len(text), chunk_size):
        chunk = text[i:i+chunk_size]
        ner_results.extend(nlp(chunk))
    
    field_value_pairs = {}
    current_field = None
    for entity in ner_results:
        word = entity['word']
        for field in FIELDS:
            if field.lower() in word.lower():
                current_field = field
                break
        if current_field and entity['entity'] == "LABEL_1":
            field_value_pairs[current_field] = word
    
    return field_value_pairs

def process_image(image):
    """
    Process the uploaded image and perform OCR.
    
    Args:
        image (PIL Image): The input image.
    
    Returns:
        tuple: The image with bounding boxes, OCR results in JSON format, and field-value pairs.
    """
    # Perform OCR on the image
    ocr_results = ocr.ocr(np.array(image), cls=True)
    
    # Draw boxes on the image
    image_with_boxes = draw_boxes_on_image(image.copy(), ocr_results[0])
    
    # Convert OCR results to JSON
    json_results = convert_to_json(ocr_results[0])
    json_results_path = 'ocr_results.json'  # Save in the root directory
    with open(json_results_path, "w") as f:
        json.dump(json_results, f, indent=4)
    
    # Extract field-value pairs from the text
    text = " ".join([result[1][0] for result in ocr_results[0]])
    field_value_pairs = extract_field_value_pairs(text)
    field_value_pairs_path = 'extracted_fields.json'  # Save in the root directory
    with open(field_value_pairs_path, "w") as f:
        json.dump(field_value_pairs, f, indent=4)
    
    return image_with_boxes, json_results_path, field_value_pairs_path

# Define Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Image(type="pil"),
        gr.File(label="Download OCR Results"),
        gr.File(label="Download Extracted Fields")
    ],
    live=True
)

if __name__ == "__main__":
    iface.launch()