Spaces:

ELCA-SA
/

sketch-to-BPMN

Running

App Files Files Community

BenjiELCA commited on Jul 30, 2024

Commit

64b088f

1 Parent(s): 210f0e9

add commentary to all the code

Browse files

Files changed (9) hide show

app.py +26 -11
modules/OCR.py +143 -41
modules/dataset_loader.py +166 -60
modules/eval.py +391 -94
modules/streamlit_utils.py +209 -48
modules/toWizard.py +95 -14
modules/toXML.py +330 -64
modules/train.py +265 -240
modules/utils.py +79 -196

app.py CHANGED Viewed

@@ -6,75 +6,90 @@ import numpy as np
 from modules.streamlit_utils import *
 from modules.utils import error
 def main():
-    # Example usage
     if 'model_loaded' not in st.session_state:
         st.session_state.model_loaded = False
     st.session_state.first_run = True
     is_mobile, screen_width = configure_page()
     display_banner(is_mobile)
     display_title(is_mobile)
     display_sidebar()
     initialize_session_state()
     cropped_image = None
     img_selected = load_example_image()
     uploaded_file = load_user_image(img_selected, is_mobile)
     if uploaded_file is not None:
         cropped_image = display_image(uploaded_file, screen_width, is_mobile)
     if uploaded_file is not None:
         get_score_threshold(is_mobile)
         if st.button("🚀 Launch Prediction"):
             st.session_state.image = launch_prediction(cropped_image, st.session_state.score_threshold, is_mobile, screen_width)
             st.session_state.original_prediction = st.session_state.prediction.copy()
             st.rerun()
-    # Create placeholders for all sections
     prediction_result_placeholder = st.empty()
     additional_options_placeholder = st.empty()
     modeler_placeholder = st.empty()
     if 'prediction' in st.session_state and uploaded_file:
         if st.session_state.image != cropped_image:
             print('Image has changed')
-            # Delete the prediction
             del st.session_state.prediction
             return
-        if len(st.session_state.prediction['labels'])==0:
-            error("No prediction available. Please upload a BPMN image or decrease the detection score treshold.")
         else:
             with prediction_result_placeholder.container():
                 if is_mobile:
-                    display_options(st.session_state.crop_image, st.session_state.score_threshold, is_mobile, int(5/6*screen_width))
                 else:
                     with st.expander("Show result of prediction"):
-                        display_options(st.session_state.crop_image, st.session_state.score_threshold, is_mobile, int(5/6*screen_width))
             if not is_mobile:
                 with additional_options_placeholder.container():
                     state = modify_results()
             with modeler_placeholder.container():
                 modeler_options(is_mobile)
                 display_bpmn_modeler(is_mobile, screen_width)
     else:
         prediction_result_placeholder.empty()
         additional_options_placeholder.empty()
         modeler_placeholder.empty()
-        # Create a lot of space for scrolling
         for _ in range(50):
             st.text("")
     gc.collect()
 if __name__ == "__main__":

 from modules.streamlit_utils import *
 from modules.utils import error
 def main():
+    """
+    Main function to run the Streamlit application for BPMN AI model recognition.
+    """
+    # Check if the model is loaded in the session state
     if 'model_loaded' not in st.session_state:
         st.session_state.model_loaded = False
     st.session_state.first_run = True
+    # Configure the Streamlit page and retrieve screen details
     is_mobile, screen_width = configure_page()
+    # Display various UI components
     display_banner(is_mobile)
     display_title(is_mobile)
     display_sidebar()
+    # Initialize session state variables
     initialize_session_state()
     cropped_image = None
+    # Load example or user-uploaded image
     img_selected = load_example_image()
     uploaded_file = load_user_image(img_selected, is_mobile)
+    # Display the uploaded image and allow cropping
     if uploaded_file is not None:
         cropped_image = display_image(uploaded_file, screen_width, is_mobile)
+    # Set score threshold for prediction if an image is uploaded
     if uploaded_file is not None:
         get_score_threshold(is_mobile)
+        # Launch prediction when the button is clicked
         if st.button("🚀 Launch Prediction"):
             st.session_state.image = launch_prediction(cropped_image, st.session_state.score_threshold, is_mobile, screen_width)
             st.session_state.original_prediction = st.session_state.prediction.copy()
             st.rerun()
+    # Create placeholders for different sections of the UI
     prediction_result_placeholder = st.empty()
     additional_options_placeholder = st.empty()
     modeler_placeholder = st.empty()
+    # Display prediction results and options if predictions are available
     if 'prediction' in st.session_state and uploaded_file:
         if st.session_state.image != cropped_image:
             print('Image has changed')
+            # Delete the prediction if the image has changed
             del st.session_state.prediction
             return
+        if len(st.session_state.prediction['labels']) == 0:
+            error("No prediction available. Please upload a BPMN image or decrease the detection score threshold.")
         else:
             with prediction_result_placeholder.container():
                 if is_mobile:
+                    display_options(st.session_state.crop_image, st.session_state.score_threshold, is_mobile, int(5/6 * screen_width))
                 else:
                     with st.expander("Show result of prediction"):
+                        display_options(st.session_state.crop_image, st.session_state.score_threshold, is_mobile, int(5/6 * screen_width))
+            # Provide additional options for modification if not on mobile
             if not is_mobile:
                 with additional_options_placeholder.container():
                     state = modify_results()
+            # Display BPMN modeler options and result
             with modeler_placeholder.container():
                 modeler_options(is_mobile)
                 display_bpmn_modeler(is_mobile, screen_width)
     else:
+        # Clear placeholders if no predictions are available
         prediction_result_placeholder.empty()
         additional_options_placeholder.empty()
         modeler_placeholder.empty()
+        # Create space for scrolling
         for _ in range(50):
             st.text("")
+    # Force garbage collection
     gc.collect()
 if __name__ == "__main__":

modules/OCR.py CHANGED Viewed

@@ -3,13 +3,14 @@ import os
 from azure.ai.vision.imageanalysis import ImageAnalysisClient
 from azure.ai.vision.imageanalysis.models import VisualFeatures
 from azure.core.credentials import AzureKeyCredential
-import time
 import numpy as np
 import networkx as nx
 from modules.utils import class_dict, proportion_inside
 import json
 from modules.utils import rescale_boxes as rescale, is_vertical
-import streamlit as st
 VISION_KEY = os.getenv("VISION_KEY")
 VISION_ENDPOINT = os.getenv("VISION_ENDPOINT")
@@ -20,15 +21,17 @@ VISION_ENDPOINT = os.getenv("VISION_ENDPOINT")
 VISION_KEY = json_data["VISION_KEY"]
 VISION_ENDPOINT = json_data["VISION_ENDPOINT"]"""
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-import logging
 # Suppress specific warnings from transformers
 logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
 # Function to initialize the model and tokenizer
 def initialize_model():
     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
     model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
     return tokenizer, model
@@ -38,6 +41,17 @@ tokenizer, emotion_model = initialize_model()
 # Function to perform sentiment analysis and return the highest scoring emotion and its score between positive and negative
 def analyze_sentiment(sentence, tokenizer=tokenizer, model=emotion_model):
     inputs = tokenizer(sentence, return_tensors="pt")
     outputs = model(**inputs)
     probs = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist()
@@ -51,8 +65,16 @@ def analyze_sentiment(sentence, tokenizer=tokenizer, model=emotion_model):
     return highest_emotion, highest_score
 def sample_ocr_image_file(image_data):
-    # Set the values of your computer vision endpoint and computer vision key
-    # as environment variables:
     try:
         endpoint = VISION_ENDPOINT
         key = VISION_KEY
@@ -77,16 +99,35 @@ def sample_ocr_image_file(image_data):
 def text_prediction(image):
-    #transform the image into a byte array
     image.save('temp.jpg')
     with open('temp.jpg', 'rb') as f:
         image_data = f.read()
     ocr_result = sample_ocr_image_file(image_data)
-    #delete the temporary image
     os.remove('temp.jpg')
     return ocr_result
 def filter_text(ocr_result, threshold=0.5):
     words_to_cancel = {"-","--","---","+",".",",","#","@","!","?","(",")","[","]","{","}","<",">","/","\\","|","-","_","=","&","^","%","$","£","€","¥","¢","¤","§","©","®","™","°","±","×","÷","¶","∆","∏","∑","∞","√","∫","≈","≠","≤","≥","≡","∼"}
     # Add every other one-letter word to the list of words to cancel, except 'I' and 'a'
     for letter in "bcdefghjklmnopqrstuvwxyz1234567890":  # All lowercase letters except 'a'
@@ -132,10 +173,16 @@ def filter_text(ocr_result, threshold=0.5):
     return list_of_lines
-def get_box_points(box):
-    """Returns all critical points of a box: corners and midpoints of edges."""
     xmin, ymin, xmax, ymax = box
     return np.array([
         [xmin, ymin],  # Bottom-left corner
@@ -149,7 +196,16 @@ def get_box_points(box):
     ])
 def min_distance_between_boxes(box1, box2):
-    """Computes the minimum distance between two boxes considering all critical points."""
     points1 = get_box_points(box1)
     points2 = get_box_points(box2)
@@ -162,7 +218,17 @@ def min_distance_between_boxes(box1, box2):
     return min_dist
 def are_close(box1, box2, threshold=50):
-    """Determines if boxes are close based on their corners and center points."""
     corners1 = np.array([
         [box1[0], box1[1]], [box1[0], box1[3]], [box1[2], box1[1]], [box1[2], box1[3]],
         [(box1[0]+box1[2])/2, box1[1]], [(box1[0]+box1[2])/2, box1[3]],
@@ -180,13 +246,25 @@ def are_close(box1, box2, threshold=50):
     return False
 def find_closest_box(text_box, all_boxes, labels, threshold, iou_threshold=0.5):
-    """Find the closest box to the given text box within a specified threshold."""
     min_distance = float('inf')
     closest_index = None
-    #check if the text is inside a sequenceFlow
     for j in range(len(all_boxes)):
-        if proportion_inside(text_box, all_boxes[j])>iou_threshold and labels[j] == list(class_dict.values()).index('sequenceFlow'):
             return j
     for i, box in enumerate(all_boxes):
@@ -209,20 +287,32 @@ def find_closest_box(text_box, all_boxes, labels, threshold, iou_threshold=0.5):
     return None
 def group_texts(task_boxes, text_boxes, texts, min_dist=50, iou_threshold=0.8, percentage_thresh=0.8):
-    """Maps text boxes to task boxes and groups texts within each task based on proximity."""
     G = nx.Graph()
     # Map each text box to the nearest task box
     task_to_texts = {i: [] for i in range(len(task_boxes))}
-    information_texts = []  # texts not inside any task box
     text_to_task_mapped = [False] * len(text_boxes)
     for idx, text_box in enumerate(text_boxes):
         mapped = False
         for jdx, task_box in enumerate(task_boxes):
-            if proportion_inside(text_box, task_box)>iou_threshold:
                 task_to_texts[jdx].append(idx)
                 text_to_task_mapped[idx] = True
                 mapped = True
@@ -326,32 +416,45 @@ def group_texts(task_boxes, text_boxes, texts, min_dist=50, iou_threshold=0.8, p
     return all_grouped_texts, sentence_boxes, information_grouped_texts, info_sentence_boxes
-def mapping_text(full_pred, text_pred, print_sentences=False,percentage_thresh=0.6,scale=1.0, iou_threshold=0.5):
     boxes = rescale(scale, full_pred['boxes'])
     min_dist = 200
     labels = full_pred['labels']
     avoid = [list(class_dict.values()).index('pool'), list(class_dict.values()).index('lane'), list(class_dict.values()).index('sequenceFlow'), list(class_dict.values()).index('messageFlow'), list(class_dict.values()).index('dataAssociation')]
     for i in range(len(boxes)):
-            box1 = boxes[i]
-            if labels[i] in avoid:
                 continue
-            for j in range(i + 1, len(boxes)):
-                    box2 = boxes[j]
-                    if labels[j] in avoid:
-                        continue
-                    dist = min_distance_between_boxes(box1, box2)
-                    min_dist = min(min_dist, dist)
-    #print("Minimum distance between boxes:", min_dist)
     text_pred[0] = rescale(scale, text_pred[0])
     task_boxes = [box for i, box in enumerate(boxes) if full_pred['labels'][i] == list(class_dict.values()).index('task')]
     grouped_sentences, sentence_bounding_boxes, info_texts, info_boxes = group_texts(task_boxes, text_pred[0], text_pred[1], min_dist=min_dist)
     BPMN_id = set(full_pred['BPMN_id'])  # This ensures uniqueness of task names
     text_mapping = {id: '' for id in BPMN_id}
     if print_sentences:
         for sentence, box in zip(grouped_sentences, sentence_bounding_boxes):
@@ -363,8 +466,8 @@ def mapping_text(full_pred, text_pred, print_sentences=False,percentage_thresh=0
     # Map the grouped sentences to the corresponding task
     for i in range(len(sentence_bounding_boxes)):
         for j in range(len(boxes)):
-            if proportion_inside(sentence_bounding_boxes[i], boxes[j])>iou_threshold and full_pred['labels'][j] == list(class_dict.values()).index('task'):
-                text_mapping[full_pred['BPMN_id'][j]]=grouped_sentences[i]
     # Map the grouped sentences to the corresponding pool
     for key, elements in full_pred['pool_dict'].items():
@@ -372,17 +475,16 @@ def mapping_text(full_pred, text_pred, print_sentences=False,percentage_thresh=0
             continue
         else:
             for i in range(len(info_boxes)):
-                #find the position of the key in BPMN_id
                 position = list(full_pred['BPMN_id']).index(key)
-                if proportion_inside(info_boxes[i], boxes[position])>iou_threshold:
                     text_mapping[key] = info_texts[i]
                     info_texts[i] = ''  # Clear the text to avoid re-use
     for i in range(len(info_boxes)):
         if is_vertical(info_boxes[i]):
             for j in range(len(boxes)):
-                if proportion_inside(info_boxes[i], boxes[j])>0 and full_pred['labels'][j] == list(class_dict.values()).index('pool'):
                     print("Text:", info_texts[i], "associate with ", full_pred['BPMN_id'][j])
                     bpmn_id = full_pred['BPMN_id'][j]
                     # Append new text or create new entry if not existing
@@ -399,10 +501,10 @@ def mapping_text(full_pred, text_pred, print_sentences=False,percentage_thresh=0
         for j in range(len(boxes)):
             if info_texts[i] == '':
                 continue  # Skip if there's no text
-            if (proportion_inside(info_boxes[i], boxes[j])>0 or are_close(info_boxes[i], boxes[j], threshold=percentage_thresh*min_dist)) and (full_pred['labels'][j] == list(class_dict.values()).index('event')
                                                                              or full_pred['labels'][j] == list(class_dict.values()).index('messageEvent')
                                                                              or full_pred['labels'][j] == list(class_dict.values()).index('timerEvent')
-                                                                             or full_pred['labels'][j] == list(class_dict.values()).index('dataObject')) :
                 bpmn_id = full_pred['BPMN_id'][j]
                 # Append new text or create new entry if not existing
                 if bpmn_id in text_mapping:
@@ -416,7 +518,7 @@ def mapping_text(full_pred, text_pred, print_sentences=False,percentage_thresh=0
         if info_texts[i] == '' or is_vertical(info_boxes[i]):
             continue  # Skip if there's no text
         # Find the closest box within the defined threshold
-        closest_index = find_closest_box(info_boxes[i], boxes, full_pred['labels'], threshold=4*min_dist)
         if closest_index is not None and (full_pred['labels'][closest_index] == list(class_dict.values()).index('sequenceFlow') or full_pred['labels'][closest_index] == list(class_dict.values()).index('messageFlow')):
             bpmn_id = full_pred['BPMN_id'][closest_index]
             # Append new text or create new entry if not existing
@@ -430,4 +532,4 @@ def mapping_text(full_pred, text_pred, print_sentences=False,percentage_thresh=0
         print("Text Mapping:", text_mapping)
         print("Information Texts left:", info_texts)
-    return text_mapping

 from azure.ai.vision.imageanalysis import ImageAnalysisClient
 from azure.ai.vision.imageanalysis.models import VisualFeatures
 from azure.core.credentials import AzureKeyCredential
 import numpy as np
 import networkx as nx
 from modules.utils import class_dict, proportion_inside
 import json
 from modules.utils import rescale_boxes as rescale, is_vertical
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import logging
 VISION_KEY = os.getenv("VISION_KEY")
 VISION_ENDPOINT = os.getenv("VISION_ENDPOINT")
 VISION_KEY = json_data["VISION_KEY"]
 VISION_ENDPOINT = json_data["VISION_ENDPOINT"]"""
 # Suppress specific warnings from transformers
 logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
 # Function to initialize the model and tokenizer
 def initialize_model():
+    """
+    Initialize the tokenizer and model for sentiment analysis.
+    """
     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
     model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
     return tokenizer, model
 # Function to perform sentiment analysis and return the highest scoring emotion and its score between positive and negative
 def analyze_sentiment(sentence, tokenizer=tokenizer, model=emotion_model):
+    """
+    Analyze the sentiment of a given sentence using the initialized tokenizer and model.
+    Parameters:
+    - sentence (str): The input sentence to analyze.
+    - tokenizer (AutoTokenizer): The tokenizer for processing the sentence.
+    - model (AutoModelForSequenceClassification): The model for sentiment analysis.
+    Returns:
+    - tuple: The highest scoring emotion ('positive' or 'negative') and its corresponding score.
+    """
     inputs = tokenizer(sentence, return_tensors="pt")
     outputs = model(**inputs)
     probs = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist()
     return highest_emotion, highest_score
 def sample_ocr_image_file(image_data):
+    """
+    Sample OCR function to analyze an image file and extract text using Azure's Computer Vision service.
+    Parameters:
+    - image_data (bytes): The image data in bytes.
+    Returns:
+    - result: The OCR result from the Computer Vision service.
+    """
+    # Set the values of your computer vision endpoint and computer vision key as environment variables:
     try:
         endpoint = VISION_ENDPOINT
         key = VISION_KEY
 def text_prediction(image):
+    """
+    Perform OCR on an image to extract text.
+    Parameters:
+    - image: The image to process.
+    Returns:
+    - ocr_result: The OCR result.
+    """
+    # Transform the image into a byte array
     image.save('temp.jpg')
     with open('temp.jpg', 'rb') as f:
         image_data = f.read()
     ocr_result = sample_ocr_image_file(image_data)
+    # Delete the temporary image
     os.remove('temp.jpg')
     return ocr_result
 def filter_text(ocr_result, threshold=0.5):
+    """
+    Filter and process the OCR results to remove unwanted characters and low-confidence words.
+    Parameters:
+    - ocr_result: The OCR result.
+    - threshold (float): The confidence threshold for filtering words.
+    Returns:
+    - list_of_lines: Processed text lines and their bounding boxes.
+    """
     words_to_cancel = {"-","--","---","+",".",",","#","@","!","?","(",")","[","]","{","}","<",">","/","\\","|","-","_","=","&","^","%","$","£","€","¥","¢","¤","§","©","®","™","°","±","×","÷","¶","∆","∏","∑","∞","√","∫","≈","≠","≤","≥","≡","∼"}
     # Add every other one-letter word to the list of words to cancel, except 'I' and 'a'
     for letter in "bcdefghjklmnopqrstuvwxyz1234567890":  # All lowercase letters except 'a'
     return list_of_lines
+def get_box_points(box):
+    """
+    Returns all critical points of a box: corners and midpoints of edges.
+    Parameters:
+    - box (array): Bounding box coordinates [xmin, ymin, xmax, ymax].
+    Returns:
+    - numpy.array: Array of critical points.
+    """
     xmin, ymin, xmax, ymax = box
     return np.array([
         [xmin, ymin],  # Bottom-left corner
     ])
 def min_distance_between_boxes(box1, box2):
+    """
+    Computes the minimum distance between two boxes considering all critical points.
+    Parameters:
+    - box1 (array): First bounding box coordinates.
+    - box2 (array): Second bounding box coordinates.
+    Returns:
+    - float: The minimum distance between the two boxes.
+    """
     points1 = get_box_points(box1)
     points2 = get_box_points(box2)
     return min_dist
 def are_close(box1, box2, threshold=50):
+    """
+    Determines if boxes are close based on their corners and center points.
+    Parameters:
+    - box1 (array): First bounding box coordinates.
+    - box2 (array): Second bounding box coordinates.
+    - threshold (int): Distance threshold for determining closeness.
+    Returns:
+    - bool: True if boxes are close, otherwise False.
+    """
     corners1 = np.array([
         [box1[0], box1[1]], [box1[0], box1[3]], [box1[2], box1[1]], [box1[2], box1[3]],
         [(box1[0]+box1[2])/2, box1[1]], [(box1[0]+box1[2])/2, box1[3]],
     return False
 def find_closest_box(text_box, all_boxes, labels, threshold, iou_threshold=0.5):
+    """
+    Find the closest box to the given text box within a specified threshold.
+    Parameters:
+    - text_box (array): The text box coordinates.
+    - all_boxes (list): List of all bounding boxes.
+    - labels (list): List of labels corresponding to the boxes.
+    - threshold (float): Distance threshold for determining closeness.
+    - iou_threshold (float): IoU threshold for determining if a text is inside a sequenceFlow.
+    Returns:
+    - int or None: Index of the closest box or None if no box is close enough.
+    """
     min_distance = float('inf')
     closest_index = None
+    # Check if the text is inside a sequenceFlow
     for j in range(len(all_boxes)):
+        if proportion_inside(text_box, all_boxes[j]) > iou_threshold and labels[j] == list(class_dict.values()).index('sequenceFlow'):
             return j
     for i, box in enumerate(all_boxes):
     return None
 def group_texts(task_boxes, text_boxes, texts, min_dist=50, iou_threshold=0.8, percentage_thresh=0.8):
+    """
+    Maps text boxes to task boxes and groups texts within each task based on proximity.
+    Parameters:
+    - task_boxes (list): List of task bounding boxes.
+    - text_boxes (list): List of text bounding boxes.
+    - texts (list): List of texts corresponding to the text boxes.
+    - min_dist (float): Minimum distance threshold for grouping.
+    - iou_threshold (float): IoU threshold for determining if text is inside a task box.
+    - percentage_thresh (float): Percentage threshold for determining if text boxes are close.
+    Returns:
+    - tuple: Grouped task-related texts, their bounding boxes, grouped information texts, and their bounding boxes.
+    """
     G = nx.Graph()
     # Map each text box to the nearest task box
     task_to_texts = {i: [] for i in range(len(task_boxes))}
+    information_texts = []  # Texts not inside any task box
     text_to_task_mapped = [False] * len(text_boxes)
     for idx, text_box in enumerate(text_boxes):
         mapped = False
         for jdx, task_box in enumerate(task_boxes):
+            if proportion_inside(text_box, task_box) > iou_threshold:
                 task_to_texts[jdx].append(idx)
                 text_to_task_mapped[idx] = True
                 mapped = True
     return all_grouped_texts, sentence_boxes, information_grouped_texts, info_sentence_boxes
+def mapping_text(full_pred, text_pred, print_sentences=False, percentage_thresh=0.6, scale=1.0, iou_threshold=0.5):
+    """
+    Map the extracted texts to the predicted bounding boxes.
+    Parameters:
+    - full_pred (dict): Full prediction dictionary containing boxes, labels, BPMN IDs, and pool dictionary.
+    - text_pred (list): List containing text predictions and their bounding boxes.
+    - print_sentences (bool): Whether to print the sentences and their bounding boxes.
+    - percentage_thresh (float): Percentage threshold for determining closeness.
+    - scale (float): Scale factor for rescaling bounding boxes.
+    - iou_threshold (float): IoU threshold for determining if text is inside a bounding box.
+    Returns:
+    - dict: Text mapping for BPMN elements.
+    """
     boxes = rescale(scale, full_pred['boxes'])
     min_dist = 200
     labels = full_pred['labels']
     avoid = [list(class_dict.values()).index('pool'), list(class_dict.values()).index('lane'), list(class_dict.values()).index('sequenceFlow'), list(class_dict.values()).index('messageFlow'), list(class_dict.values()).index('dataAssociation')]
     for i in range(len(boxes)):
+        box1 = boxes[i]
+        if labels[i] in avoid:
+            continue
+        for j in range(i + 1, len(boxes)):
+            box2 = boxes[j]
+            if labels[j] in avoid:
                 continue
+            dist = min_distance_between_boxes(box1, box2)
+            min_dist = min(min_dist, dist)
+    # Print the minimum distance between boxes
+    # print("Minimum distance between boxes:", min_dist)
     text_pred[0] = rescale(scale, text_pred[0])
     task_boxes = [box for i, box in enumerate(boxes) if full_pred['labels'][i] == list(class_dict.values()).index('task')]
     grouped_sentences, sentence_bounding_boxes, info_texts, info_boxes = group_texts(task_boxes, text_pred[0], text_pred[1], min_dist=min_dist)
     BPMN_id = set(full_pred['BPMN_id'])  # This ensures uniqueness of task names
     text_mapping = {id: '' for id in BPMN_id}
     if print_sentences:
         for sentence, box in zip(grouped_sentences, sentence_bounding_boxes):
     # Map the grouped sentences to the corresponding task
     for i in range(len(sentence_bounding_boxes)):
         for j in range(len(boxes)):
+            if proportion_inside(sentence_bounding_boxes[i], boxes[j]) > iou_threshold and full_pred['labels'][j] == list(class_dict.values()).index('task'):
+                text_mapping[full_pred['BPMN_id'][j]] = grouped_sentences[i]
     # Map the grouped sentences to the corresponding pool
     for key, elements in full_pred['pool_dict'].items():
             continue
         else:
             for i in range(len(info_boxes)):
+                # Find the position of the key in BPMN_id
                 position = list(full_pred['BPMN_id']).index(key)
+                if proportion_inside(info_boxes[i], boxes[position]) > iou_threshold:
                     text_mapping[key] = info_texts[i]
                     info_texts[i] = ''  # Clear the text to avoid re-use
     for i in range(len(info_boxes)):
         if is_vertical(info_boxes[i]):
             for j in range(len(boxes)):
+                if proportion_inside(info_boxes[i], boxes[j]) > 0 and full_pred['labels'][j] == list(class_dict.values()).index('pool'):
                     print("Text:", info_texts[i], "associate with ", full_pred['BPMN_id'][j])
                     bpmn_id = full_pred['BPMN_id'][j]
                     # Append new text or create new entry if not existing
         for j in range(len(boxes)):
             if info_texts[i] == '':
                 continue  # Skip if there's no text
+            if (proportion_inside(info_boxes[i], boxes[j]) > 0 or are_close(info_boxes[i], boxes[j], threshold=percentage_thresh * min_dist)) and (full_pred['labels'][j] == list(class_dict.values()).index('event')
                                                                              or full_pred['labels'][j] == list(class_dict.values()).index('messageEvent')
                                                                              or full_pred['labels'][j] == list(class_dict.values()).index('timerEvent')
+                                                                             or full_pred['labels'][j] == list(class_dict.values()).index('dataObject')):
                 bpmn_id = full_pred['BPMN_id'][j]
                 # Append new text or create new entry if not existing
                 if bpmn_id in text_mapping:
         if info_texts[i] == '' or is_vertical(info_boxes[i]):
             continue  # Skip if there's no text
         # Find the closest box within the defined threshold
+        closest_index = find_closest_box(info_boxes[i], boxes, full_pred['labels'], threshold=4 * min_dist)
         if closest_index is not None and (full_pred['labels'][closest_index] == list(class_dict.values()).index('sequenceFlow') or full_pred['labels'][closest_index] == list(class_dict.values()).index('messageFlow')):
             bpmn_id = full_pred['BPMN_id'][closest_index]
             # Append new text or create new entry if not existing
         print("Text Mapping:", text_mapping)
         print("Information Texts left:", info_texts)
+    return text_mapping

modules/dataset_loader.py CHANGED Viewed

@@ -1,7 +1,3 @@
-from torchvision.models.detection import keypointrcnn_resnet50_fpn
-from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
-from torchvision.models.detection.keypoint_rcnn import KeypointRCNNPredictor
-from torchvision.models.detection import KeypointRCNN_ResNet50_FPN_Weights
 import random
 import torch
 from torch.utils.data import Dataset
@@ -9,43 +5,60 @@ import torchvision.transforms.functional as F
 import numpy as np
 from torch.utils.data.dataloader import default_collate
 import cv2
-import matplotlib.pyplot as plt
-from torch.utils.data import DataLoader, Subset, ConcatDataset
-import streamlit as st
 from modules.utils import object_dict, arrow_dict, resize_boxes, resize_keypoints
 class RandomCrop:
-    def __init__(self, new_size=(1333,800),crop_fraction=0.5, min_objects=4):
         self.crop_fraction = crop_fraction
         self.min_objects = min_objects
         self.new_size = new_size
     def __call__(self, image, target):
         new_w1, new_h1 = self.new_size
         w, h = image.size
         new_w = int(w * self.crop_fraction)
-        new_h = int(new_w*new_h1/new_w1)
-        i=0
-        for i in range(4):
-          if new_h >= h:
-            i += 0.05
-            new_w = int(w * (self.crop_fraction - i))
-            new_h = int(new_w*new_h1/new_w1)
-          if new_h < h:
-            continue
-        if new_h >= h:
-          return image, target
         boxes = target["boxes"]
         if 'keypoints' in target:
             keypoints = target["keypoints"]
         else:
             keypoints = []
-            for i in range(len(boxes)):
-                keypoints.append(torch.zeros((2,3)))
         # Attempt to find a suitable crop region
         success = False
@@ -82,7 +95,7 @@ class RandomCrop:
 class RandomFlip:
     def __init__(self, h_flip_prob=0.5, v_flip_prob=0.5):
         """
-        Initializes the RandomFlip with probabilities for flipping.
         Parameters:
         - h_flip_prob (float): Probability of applying a horizontal flip to the image.
@@ -93,7 +106,7 @@ class RandomFlip:
     def __call__(self, image, target):
         """
-        Applies random horizontal and/or vertical flip to the image and updates target data accordingly.
         Parameters:
         - image (PIL Image): The image to be flipped.
@@ -143,12 +156,12 @@ class RandomFlip:
                 target['keypoints'] = torch.stack(new_keypoints)
         return image, target
 class RandomRotate:
     def __init__(self, max_rotate_deg=20, rotate_proba=0.3):
         """
-        Initializes the RandomRotate with a maximum rotation angle and probability of rotating.
         Parameters:
         - max_rotate_deg (int): Maximum degree to rotate the image.
@@ -159,7 +172,7 @@ class RandomRotate:
     def __call__(self, image, target):
         """
-        Randomly rotates the image and updates the target data accordingly.
         Parameters:
         - image (PIL Image): The image to be rotated.
@@ -170,7 +183,7 @@ class RandomRotate:
         """
         if random.random() < self.rotate_proba:
             angle = random.uniform(-self.max_rotate_deg, self.max_rotate_deg)
-            image = F.rotate(image, angle, expand=False, fill=200)
             # Rotate bounding boxes
             w, h = image.size
@@ -194,7 +207,16 @@ class RandomRotate:
     def rotate_box(self, box, angle, cx, cy):
         """
-        Rotates a bounding box by a given angle around the center of the image.
         """
         x1, y1, x2, y2 = box
         corners = torch.tensor([
@@ -214,7 +236,16 @@ class RandomRotate:
     def rotate_keypoints(self, keypoints, angle, cx, cy):
         """
-        Rotates keypoints by a given angle around the center of the image.
         """
         new_keypoints = []
         for kp in keypoints:
@@ -226,50 +257,89 @@ class RandomRotate:
         return torch.stack(new_keypoints)
 def rotate_90_box(box, angle, w, h):
     x1, y1, x2, y2 = box
     if angle == 90:
-        return torch.tensor([y1,h-x2,y2,h-x1])
     elif angle == 270 or angle == -90:
-        return torch.tensor([w-y2,x1,w-y1,x2])
     else:
         print("angle not supported")
 def rotate_90_keypoints(kp, angle, w, h):
     # Extract coordinates and visibility from each keypoint tensor
     x1, y1, v1 = kp[0][0], kp[0][1], kp[0][2]
     x2, y2, v2 = kp[1][0], kp[1][1], kp[1][2]
     # Swap x and y coordinates for each keypoint
     if angle == 90:
-        new = [[y1, h-x1, v1], [y2, h-x2, v2]]
     elif angle == 270 or angle == -90:
-        new = [[w-y1, x1, v1], [w-y2, x2, v2]]
     return torch.tensor(new, dtype=torch.float32)
 def rotate_vertical(image, target):
-    # Rotate the image and target if the image is vertical
     new_boxes = []
-    angle = random.choice([-90,90])
     image = F.rotate(image, angle, expand=True, fill=200)
     for box in target["boxes"]:
         new_box = rotate_90_box(box, angle, image.size[0], image.size[1])
         new_boxes.append(new_box)
     target["boxes"] = torch.stack(new_boxes)
     if 'keypoints' in target:
-        new_kp = []
-        for kp in target['keypoints']:
             new_key = rotate_90_keypoints(kp, angle, image.size[0], image.size[1])
             new_kp.append(new_key)
         target['keypoints'] = torch.stack(new_kp)
     return image, target
-import torchvision.transforms.functional as F
-import torch
-def resize_and_pad(image, target, new_size=(1333, 800)):
     original_size = image.size
     # Calculate scale to fit the new size while maintaining aspect ratio
     scale = min(new_size[0] / original_size[0], new_size[1] / original_size[1])
@@ -302,8 +372,24 @@ def resize_and_pad(image, target, new_size=(1333, 800)):
     return image, target
 class BPMN_Dataset(Dataset):
-    def __init__(self, annotations, transform=None, crop_transform=None, crop_prob=0.3, rotate_90_proba=0.2,
-                 flip_transform=None, rotate_transform=None, new_size=(1333,1333), keep_ratio=0.1, resize=True, model_type='object'):
         self.annotations = annotations
         print(f"Loaded {len(self.annotations)} annotations.")
         self.transform = transform
@@ -322,15 +408,30 @@ class BPMN_Dataset(Dataset):
         self.rotate_90_proba = rotate_90_proba
     def __len__(self):
         return len(self.annotations)
     def __getitem__(self, idx):
         annotation = self.annotations[idx]
         image = annotation.img.convert("RGB")
         boxes = torch.tensor(np.array(annotation.boxes_ltrb), dtype=torch.float32)
         labels_names = [ann for ann in annotation.categories]
-        # Only keep the labels, boxes and keypoints that are in the class_dict
         kept_indices = [i for i, ann in enumerate(annotation.categories) if ann in self.dict.values()]
         boxes = boxes[kept_indices]
         labels_names = [ann for i, ann in enumerate(labels_names) if i in kept_indices]
@@ -351,7 +452,7 @@ class BPMN_Dataset(Dataset):
             if ann.category in ["sequenceFlow", "messageFlow", "dataAssociation"]:
                 # Fill the keypoints tensor for this annotation, mark as visible (1)
                 kp = np.array(ann.keypoints, dtype=np.float32).reshape(-1, 3)
-                kp = kp[:,:2]
                 visible = np.ones((kp.shape[0], 1), dtype=np.float32)
                 kp = np.hstack([kp, visible])
                 keypoints[ii, :kp.shape[0], :] = torch.tensor(kp, dtype=torch.float32)
@@ -359,17 +460,17 @@ class BPMN_Dataset(Dataset):
         area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
-        if self.model_type == 'object':
             target = {
                 "boxes": boxes,
                 "labels": labels_id,
-                #"area": area,
             }
         elif self.model_type == 'arrow':
             target = {
                 "boxes": boxes,
                 "labels": labels_id,
-                #"area": area,
                 "keypoints": keypoints,
             }
@@ -384,7 +485,7 @@ class BPMN_Dataset(Dataset):
         # Randomly apply the custom cropping transform
         if self.crop_transform and random.random() < self.crop_prob:
             image, target = self.crop_transform(image, target)
         # Rotate vertical image
         if random.random() < self.rotate_90_proba:
             image, target = rotate_vertical(image, target)
@@ -394,12 +495,12 @@ class BPMN_Dataset(Dataset):
                 # Center and pad the image while keeping the aspect ratio
                 image, target = resize_and_pad(image, target, self.new_size)
             else:
-                target['boxes'] = resize_boxes(target['boxes'], (image.size[0],image.size[1]), self.new_size)
                 if 'area' in target:
                     target['area'] = (target['boxes'][:, 3] - target['boxes'][:, 1]) * (target['boxes'][:, 2] - target['boxes'][:, 0])
                 if 'keypoints' in target:
                     for i in range(len(target['keypoints'])):
-                        target['keypoints'][i] = resize_keypoints(target['keypoints'][i], (image.size[0],image.size[1]), self.new_size)
                 image = F.resize(image, (self.new_size[1], self.new_size[0]))
         return self.transform(image), target
@@ -429,15 +530,15 @@ def collate_fn(batch):
     return images, targets
-def create_loader(new_size,transformation, annotations1, annotations2=None,
                   batch_size=4, crop_prob=0.2, crop_fraction=0.7, min_objects=3,
                   h_flip_prob=0.3, v_flip_prob=0.3, max_rotate_deg=20, rotate_90_proba=0.2, rotate_proba=0.3,
-                  seed=42, resize=True, keep_ratio=0.1, model_type = 'object'):
     """
-    Creates a DataLoader for BPMN datasets with optional transformations and concatenation of two datasets.
     Parameters:
     - transformation (callable): Transformation function to apply to each image (e.g., normalization).
     - annotations1 (list): Primary list of annotations.
     - annotations2 (list, optional): Secondary list of annotations to concatenate with the first.
@@ -447,15 +548,20 @@ def create_loader(new_size,transformation, annotations1, annotations2=None,
     - min_objects (int): Minimum number of objects required to be within the crop.
     - h_flip_prob (float): Probability of applying horizontal flip.
     - v_flip_prob (float): Probability of applying vertical flip.
     - seed (int): Seed for random number generators for reproducibility.
     - resize (bool): Flag indicating whether to resize images after transformations.
     Returns:
     - DataLoader: Configured data loader for the dataset.
     """
     # Initialize custom transformations for cropping and flipping
-    custom_crop_transform = RandomCrop(new_size,crop_fraction, min_objects)
     custom_flip_transform = RandomFlip(h_flip_prob, v_flip_prob)
     custom_rotate_transform = RandomRotate(max_rotate_deg, rotate_proba)
@@ -497,4 +603,4 @@ def create_loader(new_size,transformation, annotations1, annotations2=None,
     # Create the DataLoader with the dataset
     data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
-    return data_loader

 import random
 import torch
 from torch.utils.data import Dataset
 import numpy as np
 from torch.utils.data.dataloader import default_collate
 import cv2
+from torch.utils.data import Dataset, DataLoader, Subset, ConcatDataset
 from modules.utils import object_dict, arrow_dict, resize_boxes, resize_keypoints
+import torchvision.transforms.functional as F
+import torch
 class RandomCrop:
+    def __init__(self, new_size=(1333, 800), crop_fraction=0.5, min_objects=4):
+        """
+        Initialize the RandomCrop transformation.
+        Parameters:
+        - new_size (tuple): The target size for the image after cropping.
+        - crop_fraction (float): The fraction of the original width to use when cropping.
+        - min_objects (int): Minimum number of objects required to be within the crop.
+        """
         self.crop_fraction = crop_fraction
         self.min_objects = min_objects
         self.new_size = new_size
     def __call__(self, image, target):
+        """
+        Apply the RandomCrop transformation to the image and its target.
+        Parameters:
+        - image (PIL Image): The image to be cropped.
+        - target (dict): The target dictionary containing 'boxes' and optional 'keypoints'.
+        Returns:
+        - PIL Image, dict: The cropped image and its updated target dictionary.
+        """
         new_w1, new_h1 = self.new_size
         w, h = image.size
         new_w = int(w * self.crop_fraction)
+        new_h = int(new_w * new_h1 / new_w1)
+        i = 0
+        for i in range(4):  # Try 4 times to adjust new_w and new_h if new_h >= h
+            if new_h >= h:
+                i += 0.05
+                new_w = int(w * (self.crop_fraction - i))
+                new_h = int(new_w * new_h1 / new_w1)
+            if new_h < h:
+                continue
+        if new_h >= h:  # If still not valid, return original image and target
+            return image, target
         boxes = target["boxes"]
         if 'keypoints' in target:
             keypoints = target["keypoints"]
         else:
             keypoints = []
+            for _ in range(len(boxes)):
+                keypoints.append(torch.zeros((2, 3)))
         # Attempt to find a suitable crop region
         success = False
 class RandomFlip:
     def __init__(self, h_flip_prob=0.5, v_flip_prob=0.5):
         """
+        Initialize the RandomFlip transformation with probabilities for flipping.
         Parameters:
         - h_flip_prob (float): Probability of applying a horizontal flip to the image.
     def __call__(self, image, target):
         """
+        Apply random horizontal and/or vertical flip to the image and updates target data accordingly.
         Parameters:
         - image (PIL Image): The image to be flipped.
                 target['keypoints'] = torch.stack(new_keypoints)
         return image, target
 class RandomRotate:
     def __init__(self, max_rotate_deg=20, rotate_proba=0.3):
         """
+        Initialize the RandomRotate transformation with a maximum rotation angle and probability of rotating.
         Parameters:
         - max_rotate_deg (int): Maximum degree to rotate the image.
     def __call__(self, image, target):
         """
+        Randomly rotate the image and updates the target data accordingly.
         Parameters:
         - image (PIL Image): The image to be rotated.
         """
         if random.random() < self.rotate_proba:
             angle = random.uniform(-self.max_rotate_deg, self.max_rotate_deg)
+            image = F.rotate(image, angle, expand=False, fill=255)
             # Rotate bounding boxes
             w, h = image.size
     def rotate_box(self, box, angle, cx, cy):
         """
+        Rotate a bounding box by a given angle around the center of the image.
+        Parameters:
+        - box (tensor): The bounding box to be rotated.
+        - angle (float): The angle to rotate the box.
+        - cx (float): The x-coordinate of the image center.
+        - cy (float): The y-coordinate of the image center.
+        Returns:
+        - tensor: The rotated bounding box.
         """
         x1, y1, x2, y2 = box
         corners = torch.tensor([
     def rotate_keypoints(self, keypoints, angle, cx, cy):
         """
+        Rotate keypoints by a given angle around the center of the image.
+        Parameters:
+        - keypoints (tensor): The keypoints to be rotated.
+        - angle (float): The angle to rotate the keypoints.
+        - cx (float): The x-coordinate of the image center.
+        - cy (float): The y-coordinate of the image center.
+        Returns:
+        - tensor: The rotated keypoints.
         """
         new_keypoints = []
         for kp in keypoints:
         return torch.stack(new_keypoints)
 def rotate_90_box(box, angle, w, h):
+    """
+    Rotate a bounding box by 90 degrees.
+    Parameters:
+    - box (tensor): The bounding box to be rotated.
+    - angle (int): The angle to rotate the box (90 or -90 degrees).
+    - w (int): The width of the image.
+    - h (int): The height of the image.
+    Returns:
+    - tensor: The rotated bounding box.
+    """
     x1, y1, x2, y2 = box
     if angle == 90:
+        return torch.tensor([y1, h - x2, y2, h - x1])
     elif angle == 270 or angle == -90:
+        return torch.tensor([w - y2, x1, w - y1, x2])
     else:
         print("angle not supported")
 def rotate_90_keypoints(kp, angle, w, h):
+    """
+    Rotate keypoints by 90 degrees.
+    Parameters:
+    - kp (tensor): The keypoints to be rotated.
+    - angle (int): The angle to rotate the keypoints (90 or -90 degrees).
+    - w (int): The width of the image.
+    - h (int): The height of the image.
+    Returns:
+    - tensor: The rotated keypoints.
+    """
     # Extract coordinates and visibility from each keypoint tensor
     x1, y1, v1 = kp[0][0], kp[0][1], kp[0][2]
     x2, y2, v2 = kp[1][0], kp[1][1], kp[1][2]
     # Swap x and y coordinates for each keypoint
     if angle == 90:
+        new = [[y1, h - x1, v1], [y2, h - x2, v2]]
     elif angle == 270 or angle == -90:
+        new = [[w - y1, x1, v1], [w - y2, x2, v2]]
     return torch.tensor(new, dtype=torch.float32)
 def rotate_vertical(image, target):
+    """
+    Rotate the image and target if the image is vertical.
+    Parameters:
+    - image (PIL Image): The image to be rotated.
+    - target (dict): The target dictionary containing 'boxes' and 'keypoints'.
+    Returns:
+    - PIL Image, dict: The rotated image and its updated target dictionary.
+    """
     new_boxes = []
+    angle = random.choice([-90, 90])
     image = F.rotate(image, angle, expand=True, fill=200)
     for box in target["boxes"]:
         new_box = rotate_90_box(box, angle, image.size[0], image.size[1])
         new_boxes.append(new_box)
     target["boxes"] = torch.stack(new_boxes)
     if 'keypoints' in target:
+        new_kp = []
+        for kp in target['keypoints']:
             new_key = rotate_90_keypoints(kp, angle, image.size[0], image.size[1])
             new_kp.append(new_key)
         target['keypoints'] = torch.stack(new_kp)
     return image, target
+def resize_and_pad(image, target, new_size=(1333, 800)):
+    """
+    Resize and pad the image and target to the specified new size while maintaining the aspect ratio.
+    Parameters:
+    - image (PIL Image): The image to be resized and padded.
+    - target (dict): The target dictionary containing 'boxes' and optional 'keypoints'.
+    - new_size (tuple): The target size for the image after resizing and padding.
+    Returns:
+    - PIL Image, dict: The resized and padded image and its updated target dictionary.
+    """
     original_size = image.size
     # Calculate scale to fit the new size while maintaining aspect ratio
     scale = min(new_size[0] / original_size[0], new_size[1] / original_size[1])
     return image, target
 class BPMN_Dataset(Dataset):
+    def __init__(self, annotations, transform=None, crop_transform=None, crop_prob=0.3, rotate_90_proba=0.2,
+                 flip_transform=None, rotate_transform=None, new_size=(1333, 1333), keep_ratio=0.1, resize=True, model_type='object'):
+        """
+        Initialize the BPMN_Dataset with annotations and optional transformations.
+        Parameters:
+        - annotations (list): List of annotations for the dataset.
+        - transform (callable, optional): Transformation function to apply to each image.
+        - crop_transform (callable, optional): Custom cropping transformation.
+        - crop_prob (float): Probability of applying the crop transformation.
+        - rotate_90_proba (float): Probability of rotating the image by 90 degrees.
+        - flip_transform (callable, optional): Custom flipping transformation.
+        - rotate_transform (callable, optional): Custom rotation transformation.
+        - new_size (tuple): Target size for the images.
+        - keep_ratio (float): Probability of keeping the aspect ratio during resizing.
+        - resize (bool): Flag indicating whether to resize images after transformations.
+        - model_type (str): Type of model ('object' or 'arrow') to determine the target dictionary.
+        """
         self.annotations = annotations
         print(f"Loaded {len(self.annotations)} annotations.")
         self.transform = transform
         self.rotate_90_proba = rotate_90_proba
     def __len__(self):
+        """
+        Return the number of annotations in the dataset.
+        Returns:
+        - int: The number of annotations.
+        """
         return len(self.annotations)
     def __getitem__(self, idx):
+        """
+        Get an item (image and target) from the dataset at the specified index.
+        Parameters:
+        - idx (int): The index of the item to retrieve.
+        Returns:
+        - PIL Image, dict: The transformed image and its updated target dictionary.
+        """
         annotation = self.annotations[idx]
         image = annotation.img.convert("RGB")
         boxes = torch.tensor(np.array(annotation.boxes_ltrb), dtype=torch.float32)
         labels_names = [ann for ann in annotation.categories]
+        # Only keep the labels, boxes, and keypoints that are in the class_dict
         kept_indices = [i for i, ann in enumerate(annotation.categories) if ann in self.dict.values()]
         boxes = boxes[kept_indices]
         labels_names = [ann for i, ann in enumerate(labels_names) if i in kept_indices]
             if ann.category in ["sequenceFlow", "messageFlow", "dataAssociation"]:
                 # Fill the keypoints tensor for this annotation, mark as visible (1)
                 kp = np.array(ann.keypoints, dtype=np.float32).reshape(-1, 3)
+                kp = kp[:, :2]
                 visible = np.ones((kp.shape[0], 1), dtype=np.float32)
                 kp = np.hstack([kp, visible])
                 keypoints[ii, :kp.shape[0], :] = torch.tensor(kp, dtype=torch.float32)
         area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
+        if self.model_type == 'object':
             target = {
                 "boxes": boxes,
                 "labels": labels_id,
+                # "area": area,
             }
         elif self.model_type == 'arrow':
             target = {
                 "boxes": boxes,
                 "labels": labels_id,
+                # "area": area,
                 "keypoints": keypoints,
             }
         # Randomly apply the custom cropping transform
         if self.crop_transform and random.random() < self.crop_prob:
             image, target = self.crop_transform(image, target)
         # Rotate vertical image
         if random.random() < self.rotate_90_proba:
             image, target = rotate_vertical(image, target)
                 # Center and pad the image while keeping the aspect ratio
                 image, target = resize_and_pad(image, target, self.new_size)
             else:
+                target['boxes'] = resize_boxes(target['boxes'], (image.size[0], image.size[1]), self.new_size)
                 if 'area' in target:
                     target['area'] = (target['boxes'][:, 3] - target['boxes'][:, 1]) * (target['boxes'][:, 2] - target['boxes'][:, 0])
                 if 'keypoints' in target:
                     for i in range(len(target['keypoints'])):
+                        target['keypoints'][i] = resize_keypoints(target['keypoints'][i], (image.size[0], image.size[1]), self.new_size)
                 image = F.resize(image, (self.new_size[1], self.new_size[0]))
         return self.transform(image), target
     return images, targets
+def create_loader(new_size, transformation, annotations1, annotations2=None,
                   batch_size=4, crop_prob=0.2, crop_fraction=0.7, min_objects=3,
                   h_flip_prob=0.3, v_flip_prob=0.3, max_rotate_deg=20, rotate_90_proba=0.2, rotate_proba=0.3,
+                  seed=42, resize=True, keep_ratio=0.1, model_type='object'):
     """
+    Create a DataLoader for BPMN datasets with optional transformations and concatenation of two datasets.
     Parameters:
+    - new_size (tuple): The target size for the images.
     - transformation (callable): Transformation function to apply to each image (e.g., normalization).
     - annotations1 (list): Primary list of annotations.
     - annotations2 (list, optional): Secondary list of annotations to concatenate with the first.
     - min_objects (int): Minimum number of objects required to be within the crop.
     - h_flip_prob (float): Probability of applying horizontal flip.
     - v_flip_prob (float): Probability of applying vertical flip.
+    - max_rotate_deg (int): Maximum degree to rotate the image.
+    - rotate_90_proba (float): Probability of rotating the image by 90 degrees.
+    - rotate_proba (float): Probability of applying rotation to the image.
     - seed (int): Seed for random number generators for reproducibility.
     - resize (bool): Flag indicating whether to resize images after transformations.
+    - keep_ratio (float): Probability of keeping the aspect ratio during resizing.
+    - model_type (str): Type of model ('object' or 'arrow') to determine the target dictionary.
     Returns:
     - DataLoader: Configured data loader for the dataset.
     """
     # Initialize custom transformations for cropping and flipping
+    custom_crop_transform = RandomCrop(new_size, crop_fraction, min_objects)
     custom_flip_transform = RandomFlip(h_flip_prob, v_flip_prob)
     custom_rotate_transform = RandomRotate(max_rotate_deg, rotate_proba)
     # Create the DataLoader with the dataset
     data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
+    return data_loader

modules/eval.py CHANGED Viewed

@@ -9,6 +9,18 @@ from builtins import dict
 def non_maximum_suppression(boxes, scores, labels=None, iou_threshold=0.5):
     exception = ['pool', 'lane']
     idxs = np.argsort(scores)  # Sort the boxes according to their scores in ascending order
@@ -40,6 +52,19 @@ def non_maximum_suppression(boxes, scores, labels=None, iou_threshold=0.5):
 def keypoint_correction(keypoints, boxes, labels, model_dict=arrow_dict, distance_treshold=15):
     for idx, (key1, key2) in enumerate(keypoints):
             if labels[idx] not in [list(model_dict.values()).index('sequenceFlow'),
                         list(model_dict.values()).index('messageFlow'),
@@ -49,14 +74,26 @@ def keypoint_correction(keypoints, boxes, labels, model_dict=arrow_dict, distanc
             distance = np.linalg.norm(key1[:2] - key2[:2])
             if distance < distance_treshold:
                 print('Key modified for index:', idx)
-                x_new,y_new, x,y = find_other_keypoint(idx, keypoints, boxes)
-                keypoints[idx][0][:2] = [x_new,y_new]
-                keypoints[idx][1][:2] = [x,y]
     return keypoints
 def object_prediction(model, image, score_threshold=0.5, iou_threshold=0.5):
     model.eval()
     with torch.no_grad():
         image_tensor = image.unsqueeze(0).to(torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
@@ -73,7 +110,7 @@ def object_prediction(model, image, score_threshold=0.5, iou_threshold=0.5):
         selected_boxes = non_maximum_suppression(boxes, scores, labels=labels, iou_threshold=iou_threshold)
-        #find orientation of the task by checking the size of all the boxes and delete the one that are not in the same orientation
         vertical = 0
         for i in range(len(labels)):
             if labels[i] != list(object_dict.values()).index('task'):
@@ -87,12 +124,12 @@ def object_prediction(model, image, score_threshold=0.5, iou_threshold=0.5):
             if vertical < horizontal:
                 if is_vertical(boxes[i]):
-                    #find the element in the list and remove it
                     if i in selected_boxes:
                         selected_boxes.remove(i)
             elif vertical > horizontal:
                 if is_vertical(boxes[i]) == False:
-                    #find the element in the list and remove it
                     if i in selected_boxes:
                         selected_boxes.remove(i)
             else:
@@ -102,23 +139,21 @@ def object_prediction(model, image, score_threshold=0.5, iou_threshold=0.5):
         scores = scores[selected_boxes]
         labels = labels[selected_boxes]
-        #find the outlier object that are too small by the area
-        obj_not_too_small = find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, element_ref = ['event', 'messageEvent'], mode = "lower")
-        obj_not_too_big = find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=2, element_ref = ['task'], mode = "upper")
         selected_object = [i for i in range(len(labels)) if i in obj_not_too_small and i in obj_not_too_big]
-        #selected_object = obj_not_too_small
         boxes = boxes[selected_object]
         scores = scores[selected_object]
         labels = labels[selected_object]
-        #modify the label of the sub-process to task
         for i in range(len(labels)):
             if labels[i] == list(object_dict.values()).index('subProcess'):
                 labels[i] = list(object_dict.values()).index('task')
-        #delete all lane and also the value in the labels and scores
         lane_index = [i for i in range(len(labels)) if labels[i] == list(object_dict.values()).index('lane')]
         boxes = np.delete(boxes, lane_index, axis=0)
         labels = np.delete(labels, lane_index)
@@ -137,6 +172,19 @@ def object_prediction(model, image, score_threshold=0.5, iou_threshold=0.5):
 def arrow_prediction(model, image, score_threshold=0.5, iou_threshold=0.5, distance_treshold=15):
     model.eval()
     with torch.no_grad():
         image_tensor = image.unsqueeze(0).to(torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
@@ -173,7 +221,18 @@ def arrow_prediction(model, image, score_threshold=0.5, iou_threshold=0.5, dista
     return image, prediction
 def mix_predictions(objects_pred, arrow_pred):
     # Initialize the list of lists for keypoints
     object_keypoints = []
@@ -186,7 +245,7 @@ def mix_predictions(objects_pred, arrow_pred):
         keypoints = [[0, 0, 0], [0, 0, 0]]
         object_keypoints.append(keypoints)
-    #concatenate the two predictions
     if len(arrow_pred['boxes']) == 0:
         return objects_pred['boxes'], objects_pred['labels'], objects_pred['scores'], object_keypoints
@@ -199,6 +258,21 @@ def mix_predictions(objects_pred, arrow_pred):
 def regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict, iou_threshold=0.6):
     pool_dict = {}
     # Filter out pools with IoU greater than the threshold
@@ -265,12 +339,24 @@ def regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict, iou_t
     return pool_dict, boxes, labels, scores, keypoints
 def create_links(keypoints, boxes, labels, class_dict):
     best_points = []
     links = []
     for i in range(len(labels)):
-        if labels[i]==list(class_dict.values()).index('sequenceFlow') or labels[i]==list(class_dict.values()).index('messageFlow'):
             closest1, point_start = find_closest_object(keypoints[i][0], boxes, labels)
             closest2, point_end = find_closest_object(keypoints[i][1], boxes, labels)
@@ -278,11 +364,11 @@ def create_links(keypoints, boxes, labels, class_dict):
                 best_points.append([point_start, point_end])
                 links.append([closest1, closest2])
         else:
-            best_points.append([None,None])
-            links.append([None,None])
     for i in range(len(labels)):
-        if labels[i]==list(class_dict.values()).index('dataAssociation'):
             closest1, point_start = find_closest_object(keypoints[i][0], boxes, labels)
             closest2, point_end = find_closest_object(keypoints[i][1], boxes, labels)
             if closest1 is not None and closest2 is not None:
@@ -291,7 +377,22 @@ def create_links(keypoints, boxes, labels, class_dict):
     return links, best_points
 def correction_labels(boxes, labels, class_dict, pool_dict, flow_links):
     sequence_flow_index = list(class_dict.values()).index('sequenceFlow')
     message_flow_index = list(class_dict.values()).index('messageFlow')
     data_association_index = list(class_dict.values()).index('dataAssociation')
@@ -339,7 +440,21 @@ def correction_labels(boxes, labels, class_dict, pool_dict, flow_links):
     return labels, flow_links
-def find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, element_ref = ['event', 'messageEvent'], mode = "lower"):
     # Filter out the sizes of events, data objects, and message events
     event_indices = [i for i, label in enumerate(labels) if class_dict[label] in element_ref]
     event_boxes = [boxes[i] for i in event_indices]
@@ -360,7 +475,7 @@ def find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, elem
     kept_indices = []
     if mode == "lower" or mode == 'both':
-        #check for object that could be too small
         for idx, (box, label) in enumerate(zip(boxes, labels)):
             area = (box[2] - box[0]) * (box[3] - box[1])
             if not (area_lower_threshold <= area):
@@ -370,7 +485,7 @@ def find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, elem
                 kept_indices.append(idx)
     if mode == "upper" or mode == 'both':
-        #check for object that could be too big
         for idx, (box, label) in enumerate(zip(boxes, labels)):
             if label == list(class_dict.values()).index('pool') or label == list(class_dict.values()).index('lane'):
                 kept_indices.append(idx)
@@ -382,17 +497,31 @@ def find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, elem
             else:
                 kept_indices.append(idx)
     return kept_indices
 def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_points, pool_dict, limit_area=10000):
-    #delete pool that are have only messageFlow on it
     delete_pool = []
     for pool_index, elements in pool_dict.items():
-        #find the position of the pool_index in the bpmn_id
         if pool_index in bpmn_id:
             position = bpmn_id.index(pool_index)
         else:
@@ -405,11 +534,11 @@ def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_point
                 delete_pool.append(position)
                 print(f"Pool {pool_index} contains only arrow elements, deleting it")
-        #calcul the area of the pool$
         if position < len(boxes):
             pool = boxes[position]
             area = (pool[2] - pool[0]) * (pool[3] - pool[1])
-            if len(pool_dict)>1 and area < limit_area:
                 delete_pool.append(position)
                 print(f"Pool {pool_index} is too small, deleting it")
@@ -417,34 +546,23 @@ def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_point
                 delete_pool.append(position)
                 print(f"Pool {position} is vertical, deleting it")
     delete_elements = []
     # Check if there is an arrow that has the same links
     for i in range(len(labels)):
-        for j in range(i+1, len(labels)):
             if labels[i] == list(class_dict.values()).index('sequenceFlow') and labels[j] == list(class_dict.values()).index('sequenceFlow'):
                 if links[i] == links[j]:
-                    print(f'element {i} and {j} have the same links')
                     if scores[i] > scores[j]:
-                        print('delete element', j)
                         delete_elements.append(j)
                     else:
-                        print('delete element', i)
                         delete_elements.append(i)
-    #filter box that are inside a text box
-    """tex_pred = st.session_state.text_pred
-    for i in range(len(boxes)):
-        for j in range(len(tex_pred[0])):
-            #check if the box is inside the text box but if the text box is inside the box then it is not a problem
-            if proportion_inside(boxes[i], tex_pred[0][j]) > 0.1:
-                #delete_elements.append(i)
-                print('delete element', i)"""
-    #concatenate the delete_elements and the delete_pool
     delete_elements = delete_elements + delete_pool
-    #delete double value in delete_elements
     delete_elements = list(set(delete_elements))
     boxes = np.delete(boxes, delete_elements, axis=0)
@@ -456,74 +574,129 @@ def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_point
     best_points = [point for i, point in enumerate(best_points) if i not in delete_elements]
     for i in range(len(delete_pool)):
-        #find the bpmn_id of the pool
         pool_index = bpmn_id[delete_pool[i]]
-        #delete the pool_index in pool_dict
         del pool_dict[pool_index]
     bpmn_id = [point for i, point in enumerate(bpmn_id) if i not in delete_elements]
-    #also delete the element in the pool_dict
     for pool_index, elements in pool_dict.items():
         pool_dict[pool_index] = [i for i in elements if i not in delete_elements]
     return boxes, labels, scores, keypoints, bpmn_id, links, best_points, pool_dict
 def give_link_to_element(links, labels):
-    #give a link to event to allow the creation of the BPMN id with start, indermediate and end event
-        for i in range(len(links)):
-            if labels[i] == list(class_dict.values()).index('sequenceFlow'):
-                id1, id2 = links[i]
-                if (id1 and id2) is not None:
-                        links[id1][1] = i
-                        links[id2][0] = i
-        return links
 def generate_data(image, boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict):
-            idx = []
-            for i in range(len(labels)):
-                idx.append(i)
-            data = {
-                'image': image,
-                'idx': idx,
-                'boxes': boxes,
-                'labels': labels,
-                'scores': scores,
-                'keypoints': keypoints,
-                'links': flow_links,
-                'best_points': best_points,
-                'pool_dict': pool_dict,
-                'BPMN_id': bpmn_id,
-            }
-            return data
-def develop_prediction(boxes, labels, scores, keypoints, class_dict):
     pool_dict, boxes, labels, scores, keypoints = regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict)
-    bpmn_id, pool_dict = create_BPMN_id(labels,pool_dict)
     # Create links between elements
     flow_links, best_points = create_links(keypoints, boxes, labels, class_dict)
-    #Correct the labels of some sequenceflow that cross multiple pool
     labels, flow_links = correction_labels(boxes, labels, class_dict, pool_dict, flow_links)
-    #give a link to event to allow the creation of the BPMN id with start, indermediate and end event
     flow_links = give_link_to_element(flow_links, labels)
-    boxes,labels,scores,keypoints,bpmn_id, flow_links,best_points,pool_dict = last_correction(boxes,labels,scores,keypoints,bpmn_id,flow_links,best_points, pool_dict)
-    return boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict
 def full_prediction(model_object, model_arrow, image, score_threshold=0.5, iou_threshold=0.5, resize=True, distance_treshold=15):
     model_object.eval()  # Set the model to evaluation mode
     model_arrow.eval()  # Set the model to evaluation mode
@@ -536,7 +709,9 @@ def full_prediction(model_object, model_arrow, image, score_threshold=0.5, iou_t
         boxes, labels, scores, keypoints = mix_predictions(objects_pred, arrow_pred)
-        boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict = develop_prediction(boxes, labels, scores, keypoints, class_dict)
         image = image.permute(1, 2, 0).cpu().numpy()
         image = (image * 255).astype(np.uint8)
@@ -545,7 +720,22 @@ def full_prediction(model_object, model_arrow, image, score_threshold=0.5, iou_t
         return image, data
 def evaluate_model_by_class(pred_boxes, true_boxes, pred_labels, true_labels, model_dict, iou_threshold=0.5):
     # Initialize dictionaries to hold per-class counts
     class_tp = {cls: 0 for cls in model_dict.values()}
     class_fp = {cls: 0 for cls in model_dict.values()}
@@ -589,10 +779,25 @@ def evaluate_model_by_class(pred_boxes, true_boxes, pred_labels, true_labels, mo
     return class_precision, class_recall, class_f1_score
-def keypoints_mesure(pred_boxes, pred_box, true_boxes, true_box, pred_keypoints, true_keypoints, distance_threshold=5):
     result = 0
     reverted = False
-    #find the position of keypoints in the list
     idx = np.where(pred_boxes == pred_box)[0][0]
     idx2 = np.where(true_boxes == true_box)[0][0]
@@ -615,7 +820,24 @@ def keypoints_mesure(pred_boxes, pred_box, true_boxes, true_box, pred_keypoints,
     return result, reverted
 def evaluate_single_image(pred_boxes, true_boxes, pred_labels, true_labels, pred_keypoints, true_keypoints, iou_threshold=0.5, distance_threshold=5):
     tp, fp, fn = 0, 0, 0
     key_t, key_f = 0, 0
     labels_t, labels_f = 0, 0
@@ -630,7 +852,9 @@ def evaluate_single_image(pred_boxes, true_boxes, pred_labels, true_labels, pred
             iou_val = iou(pred_box, true_box)
             if iou_val >= iou_threshold:
                 if true_keypoints is not None and pred_keypoints is not None:
-                    key_result, reverted = keypoints_mesure(pred_boxes, pred_box, true_boxes, true_box, pred_keypoints, true_keypoints, distance_threshold)
                     key_t += key_result
                     key_f += 2 - key_result
                     if reverted:
@@ -653,6 +877,21 @@ def evaluate_single_image(pred_boxes, true_boxes, pred_labels, true_labels, pred
 def pred_4_evaluation(model, loader, score_threshold=0.5, iou_threshold=0.5, distance_threshold=5, key_correction=True, model_type='object'):
     model.eval()
     tp, fp, fn = 0, 0, 0
     labels_t, labels_f = 0, 0
@@ -690,7 +929,7 @@ def pred_4_evaluation(model, loader, score_threshold=0.5, iou_threshold=0.5, dis
                 filtered_labels = []
                 filtered_keypoints = []
                 if 'keypoints' not in prediction:
-                    #create a list of zeros of length equal to the number of boxes
                     pred_keypoints = [np.zeros((2, 3)) for _ in range(len(pred_boxes))]
                 for box, score, label, keypoints in zip(pred_boxes, scores, pred_labels, pred_keypoints):
@@ -707,7 +946,8 @@ def pred_4_evaluation(model, loader, score_threshold=0.5, iou_threshold=0.5, dis
                     filtered_keypoints = None
                     true_keypoints = None
                 tp_img, fp_img, fn_img, labels_t_img, labels_f_img, key_t_img, key_f_img, reverted_img = evaluate_single_image(
-                    filtered_boxes, true_boxes, filtered_labels, true_labels, filtered_keypoints, true_keypoints, iou_threshold, distance_threshold)
                 tp += tp_img
                 fp += fp_img
@@ -720,9 +960,26 @@ def pred_4_evaluation(model, loader, score_threshold=0.5, iou_threshold=0.5, dis
     return tp, fp, fn, labels_t, labels_f, key_t, key_f, reverted
-def main_evaluation(model, test_loader, score_threshold=0.5, iou_threshold=0.5, distance_threshold=5, key_correction=True, model_type = 'object'):
-    tp, fp, fn, labels_t, labels_f, key_t, key_f, reverted = pred_4_evaluation(model, test_loader, score_threshold, iou_threshold, distance_threshold, key_correction, model_type)
     labels_precision = labels_t / (labels_t + labels_f) if (labels_t + labels_f) > 0 else 0
     precision = tp / (tp + fp) if (tp + fp) > 0 else 0
@@ -738,8 +995,21 @@ def main_evaluation(model, test_loader, score_threshold=0.5, iou_threshold=0.5,
     return labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy
 def evaluate_model_by_class_single_image(pred_boxes, true_boxes, pred_labels, true_labels, class_tp, class_fp, class_fn, model_dict, iou_threshold=0.5):
     matched_true_boxes = set()
     for pred_idx, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
         match_found = False
@@ -758,7 +1028,20 @@ def evaluate_model_by_class_single_image(pred_boxes, true_boxes, pred_labels, tr
         if idx not in matched_true_boxes:
             class_fn[model_dict[true_label]] += 1
 def pred_4_evaluation_per_class(model, loader, score_threshold=0.5, iou_threshold=0.5):
     model.eval()
     with torch.no_grad():
         for images, targets_im in tqdm(loader, desc="Testing... "):
@@ -788,7 +1071,21 @@ def pred_4_evaluation_per_class(model, loader, score_threshold=0.5, iou_threshol
                 yield pred_boxes, true_boxes, pred_labels, true_labels
 def evaluate_model_by_class(model, test_loader, model_dict, score_threshold=0.5, iou_threshold=0.5):
     class_tp = {cls: 0 for cls in model_dict.values()}
     class_fp = {cls: 0 for cls in model_dict.values()}
     class_fn = {cls: 0 for cls in model_dict.values()}
@@ -809,4 +1106,4 @@ def evaluate_model_by_class(model, test_loader, model_dict, score_threshold=0.5,
         class_recall[cls] = recall
         class_f1_score[cls] = f1_score
-    return class_precision, class_recall, class_f1_score

 def non_maximum_suppression(boxes, scores, labels=None, iou_threshold=0.5):
+    """
+    Perform non-maximum suppression to filter out overlapping bounding boxes.
+    Parameters:
+    - boxes (array): Array of bounding boxes.
+    - scores (array): Array of confidence scores for each bounding box.
+    - labels (array, optional): Array of labels for each bounding box.
+    - iou_threshold (float): Intersection-over-Union threshold to use for filtering.
+    Returns:
+    - list: Indices of selected boxes after suppression.
+    """
     exception = ['pool', 'lane']
     idxs = np.argsort(scores)  # Sort the boxes according to their scores in ascending order
 def keypoint_correction(keypoints, boxes, labels, model_dict=arrow_dict, distance_treshold=15):
+    """
+    Correct keypoints that are too close together by adjusting their positions.
+    Parameters:
+    - keypoints (array): Array of keypoints.
+    - boxes (array): Array of bounding boxes.
+    - labels (array): Array of labels for each bounding box.
+    - model_dict (dict): Dictionary mapping model labels to indices.
+    - distance_treshold (int): Distance threshold below which keypoints are considered too close.
+    Returns:
+    - array: Corrected keypoints.
+    """
     for idx, (key1, key2) in enumerate(keypoints):
             if labels[idx] not in [list(model_dict.values()).index('sequenceFlow'),
                         list(model_dict.values()).index('messageFlow'),
             distance = np.linalg.norm(key1[:2] - key2[:2])
             if distance < distance_treshold:
                 print('Key modified for index:', idx)
+                x_new, y_new, x, y = find_other_keypoint(idx, keypoints, boxes)
+                keypoints[idx][0][:2] = [x_new, y_new]
+                keypoints[idx][1][:2] = [x, y]
     return keypoints
 def object_prediction(model, image, score_threshold=0.5, iou_threshold=0.5):
+    """
+    Perform object detection prediction using the model.
+    Parameters:
+    - model (torch.nn.Module): The object detection model.
+    - image (torch.Tensor): The input image.
+    - score_threshold (float): Score threshold for filtering predictions.
+    - iou_threshold (float): IoU threshold for non-maximum suppression.
+    Returns:
+    - numpy.array, dict: The processed image and the prediction dictionary containing 'boxes', 'scores', and 'labels'.
+    """
     model.eval()
     with torch.no_grad():
         image_tensor = image.unsqueeze(0).to(torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
         selected_boxes = non_maximum_suppression(boxes, scores, labels=labels, iou_threshold=iou_threshold)
+        # Find orientation of the task by checking the size of all the boxes and delete the ones that are not in the same orientation
         vertical = 0
         for i in range(len(labels)):
             if labels[i] != list(object_dict.values()).index('task'):
             if vertical < horizontal:
                 if is_vertical(boxes[i]):
+                    # Find the element in the list and remove it
                     if i in selected_boxes:
                         selected_boxes.remove(i)
             elif vertical > horizontal:
                 if is_vertical(boxes[i]) == False:
+                    # Find the element in the list and remove it
                     if i in selected_boxes:
                         selected_boxes.remove(i)
             else:
         scores = scores[selected_boxes]
         labels = labels[selected_boxes]
+        # Find the outlier objects that are too small by the area
+        obj_not_too_small = find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, element_ref=['event', 'messageEvent'], mode="lower")
+        obj_not_too_big = find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=2, element_ref=['task'], mode="upper")
         selected_object = [i for i in range(len(labels)) if i in obj_not_too_small and i in obj_not_too_big]
         boxes = boxes[selected_object]
         scores = scores[selected_object]
         labels = labels[selected_object]
+        # Modify the label of the sub-process to task
         for i in range(len(labels)):
             if labels[i] == list(object_dict.values()).index('subProcess'):
                 labels[i] = list(object_dict.values()).index('task')
+        # Delete all lane and also the value in the labels and scores
         lane_index = [i for i in range(len(labels)) if labels[i] == list(object_dict.values()).index('lane')]
         boxes = np.delete(boxes, lane_index, axis=0)
         labels = np.delete(labels, lane_index)
 def arrow_prediction(model, image, score_threshold=0.5, iou_threshold=0.5, distance_treshold=15):
+    """
+    Perform arrow detection prediction using the model.
+    Parameters:
+    - model (torch.nn.Module): The arrow detection model.
+    - image (torch.Tensor): The input image.
+    - score_threshold (float): Score threshold for filtering predictions.
+    - iou_threshold (float): IoU threshold for non-maximum suppression.
+    - distance_treshold (int): Distance threshold for keypoint correction.
+    Returns:
+    - numpy.array, dict: The processed image and the prediction dictionary containing 'boxes', 'scores', 'labels', and 'keypoints'.
+    """
     model.eval()
     with torch.no_grad():
         image_tensor = image.unsqueeze(0).to(torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
     return image, prediction
 def mix_predictions(objects_pred, arrow_pred):
+    """
+    Combine object and arrow predictions into a single set of predictions.
+    Parameters:
+    - objects_pred (dict): Object predictions dictionary.
+    - arrow_pred (dict): Arrow predictions dictionary.
+    Returns:
+    - tuple: Combined boxes, labels, scores, and keypoints.
+    """
     # Initialize the list of lists for keypoints
     object_keypoints = []
         keypoints = [[0, 0, 0], [0, 0, 0]]
         object_keypoints.append(keypoints)
+    # Concatenate the two predictions
     if len(arrow_pred['boxes']) == 0:
         return objects_pred['boxes'], objects_pred['labels'], objects_pred['scores'], object_keypoints
 def regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict, iou_threshold=0.6):
+    """
+    Regroup elements by pool based on IoU and proximity.
+    Parameters:
+    - boxes (array): Array of bounding boxes.
+    - labels (array): Array of labels for each bounding box.
+    - scores (array): Array of confidence scores for each bounding box.
+    - keypoints (array): Array of keypoints.
+    - class_dict (dict): Dictionary mapping class names to indices.
+    - iou_threshold (float): IoU threshold for grouping.
+    Returns:
+    - dict: Dictionary grouping elements by pool.
+    - array: Updated arrays of boxes, labels, scores, and keypoints.
+    """
     pool_dict = {}
     # Filter out pools with IoU greater than the threshold
     return pool_dict, boxes, labels, scores, keypoints
 def create_links(keypoints, boxes, labels, class_dict):
+    """
+    Create links between elements based on keypoints.
+    Parameters:
+    - keypoints (array): Array of keypoints.
+    - boxes (array): Array of bounding boxes.
+    - labels (array): Array of labels for each bounding box.
+    - class_dict (dict): Dictionary mapping class names to indices.
+    Returns:
+    - list: List of links between elements.
+    - list: List of best points for each link.
+    """
     best_points = []
     links = []
     for i in range(len(labels)):
+        if labels[i] == list(class_dict.values()).index('sequenceFlow') or labels[i] == list(class_dict.values()).index('messageFlow'):
             closest1, point_start = find_closest_object(keypoints[i][0], boxes, labels)
             closest2, point_end = find_closest_object(keypoints[i][1], boxes, labels)
                 best_points.append([point_start, point_end])
                 links.append([closest1, closest2])
         else:
+            best_points.append([None, None])
+            links.append([None, None])
     for i in range(len(labels)):
+        if labels[i] == list(class_dict.values()).index('dataAssociation'):
             closest1, point_start = find_closest_object(keypoints[i][0], boxes, labels)
             closest2, point_end = find_closest_object(keypoints[i][1], boxes, labels)
             if closest1 is not None and closest2 is not None:
     return links, best_points
 def correction_labels(boxes, labels, class_dict, pool_dict, flow_links):
+    """
+    Correct labels based on the relationships between elements and pools.
+    Parameters:
+    - boxes (array): Array of bounding boxes.
+    - labels (array): Array of labels for each bounding box.
+    - class_dict (dict): Dictionary mapping class names to indices.
+    - pool_dict (dict): Dictionary grouping elements by pool.
+    - flow_links (list): List of links between elements.
+    Returns:
+    - array: Corrected labels.
+    - list: Updated flow links.
+    """
     sequence_flow_index = list(class_dict.values()).index('sequenceFlow')
     message_flow_index = list(class_dict.values()).index('messageFlow')
     data_association_index = list(class_dict.values()).index('dataAssociation')
     return labels, flow_links
+def find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, element_ref=['event', 'messageEvent'], mode="lower"):
+    """
+    Identify outlier objects based on their area.
+    Parameters:
+    - boxes (array): Array of bounding boxes.
+    - labels (array): Array of labels for each bounding box.
+    - class_dict (dict): Dictionary mapping class names to indices.
+    - std_factor (float): Standard deviation factor for determining outliers.
+    - element_ref (list): List of reference elements for calculating area statistics.
+    - mode (str): Mode to identify outliers ('lower', 'upper', or 'both').
+    Returns:
+    - list: Indices of kept objects that are not outliers.
+    """
     # Filter out the sizes of events, data objects, and message events
     event_indices = [i for i, label in enumerate(labels) if class_dict[label] in element_ref]
     event_boxes = [boxes[i] for i in event_indices]
     kept_indices = []
     if mode == "lower" or mode == 'both':
+        # Check for objects that could be too small
         for idx, (box, label) in enumerate(zip(boxes, labels)):
             area = (box[2] - box[0]) * (box[3] - box[1])
             if not (area_lower_threshold <= area):
                 kept_indices.append(idx)
     if mode == "upper" or mode == 'both':
+        # Check for objects that could be too big
         for idx, (box, label) in enumerate(zip(boxes, labels)):
             if label == list(class_dict.values()).index('pool') or label == list(class_dict.values()).index('lane'):
                 kept_indices.append(idx)
             else:
                 kept_indices.append(idx)
     return kept_indices
 def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_points, pool_dict, limit_area=10000):
+    """
+    Perform final corrections on the predictions by deleting irrelevant or small pools and duplicate elements.
+    Parameters:
+    - boxes (array): Array of bounding boxes.
+    - labels (array): Array of labels for each bounding box.
+    - scores (array): Array of confidence scores for each bounding box.
+    - keypoints (array): Array of keypoints.
+    - bpmn_id (list): List of BPMN IDs.
+    - links (list): List of links between elements.
+    - best_points (list): List of best points for each link.
+    - pool_dict (dict): Dictionary grouping elements by pool.
+    - limit_area (int): Minimum area threshold for pools.
+    Returns:
+    - tuple: Corrected arrays of boxes, labels, scores, keypoints, BPMN IDs, links, best points, and pool dictionary.
+    """
+    # Delete pools that have only messageFlow on it
     delete_pool = []
     for pool_index, elements in pool_dict.items():
+        # Find the position of the pool_index in the bpmn_id
         if pool_index in bpmn_id:
             position = bpmn_id.index(pool_index)
         else:
                 delete_pool.append(position)
                 print(f"Pool {pool_index} contains only arrow elements, deleting it")
+        # Calculate the area of the pool
         if position < len(boxes):
             pool = boxes[position]
             area = (pool[2] - pool[0]) * (pool[3] - pool[1])
+            if len(pool_dict) > 1 and area < limit_area:
                 delete_pool.append(position)
                 print(f"Pool {pool_index} is too small, deleting it")
                 delete_pool.append(position)
                 print(f"Pool {position} is vertical, deleting it")
     delete_elements = []
     # Check if there is an arrow that has the same links
     for i in range(len(labels)):
+        for j in range(i + 1, len(labels)):
             if labels[i] == list(class_dict.values()).index('sequenceFlow') and labels[j] == list(class_dict.values()).index('sequenceFlow'):
                 if links[i] == links[j]:
+                    print(f'Element {i} and {j} have the same links')
                     if scores[i] > scores[j]:
+                        print('Delete element', j)
                         delete_elements.append(j)
                     else:
+                        print('Delete element', i)
                         delete_elements.append(i)
+    # Concatenate the delete_elements and the delete_pool
     delete_elements = delete_elements + delete_pool
+    # Delete double value in delete_elements
     delete_elements = list(set(delete_elements))
     boxes = np.delete(boxes, delete_elements, axis=0)
     best_points = [point for i, point in enumerate(best_points) if i not in delete_elements]
     for i in range(len(delete_pool)):
+        # Find the bpmn_id of the pool
         pool_index = bpmn_id[delete_pool[i]]
+        # Delete the pool_index in pool_dict
         del pool_dict[pool_index]
     bpmn_id = [point for i, point in enumerate(bpmn_id) if i not in delete_elements]
+    # Also delete the element in the pool_dict
     for pool_index, elements in pool_dict.items():
         pool_dict[pool_index] = [i for i in elements if i not in delete_elements]
     return boxes, labels, scores, keypoints, bpmn_id, links, best_points, pool_dict
 def give_link_to_element(links, labels):
+    """
+    Assign links to elements to create BPMN IDs for events.
+    Parameters:
+    - links (list): List of links between elements.
+    - labels (array): Array of labels for each bounding box.
+    Returns:
+    - list: Updated list of links with assigned links for events.
+    """
+    # Give a link to event to allow the creation of the BPMN ID with start, intermediate, and end event
+    for i in range(len(links)):
+        if labels[i] == list(class_dict.values()).index('sequenceFlow'):
+            id1, id2 = links[i]
+            if (id1 and id2) is not None:
+                links[id1][1] = i
+                links[id2][0] = i
+    return links
 def generate_data(image, boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict):
+    """
+    Generate a data dictionary containing image and prediction information.
+    Parameters:
+    - image (numpy.array): The input image.
+    - boxes (array): Array of bounding boxes.
+    - labels (array): Array of labels for each bounding box.
+    - scores (array): Array of confidence scores for each bounding box.
+    - keypoints (array): Array of keypoints.
+    - bpmn_id (list): List of BPMN IDs.
+    - flow_links (list): List of links between elements.
+    - best_points (list): List of best points for each link.
+    - pool_dict (dict): Dictionary grouping elements by pool.
+    Returns:
+    - dict: Data dictionary containing all prediction information.
+    """
+    idx = []
+    for i in range(len(labels)):
+        idx.append(i)
+    data = {
+        'image': image,
+        'idx': idx,
+        'boxes': boxes,
+        'labels': labels,
+        'scores': scores,
+        'keypoints': keypoints,
+        'links': flow_links,
+        'best_points': best_points,
+        'pool_dict': pool_dict,
+        'BPMN_id': bpmn_id,
+    }
+    return data
+def develop_prediction(boxes, labels, scores, keypoints, class_dict):
+    """
+    Develop predictions by regrouping elements, creating links, and correcting labels.
+    Parameters:
+    - boxes (array): Array of bounding boxes.
+    - labels (array): Array of labels for each bounding box.
+    - scores (array): Array of confidence scores for each bounding box.
+    - keypoints (array): Array of keypoints.
+    - class_dict (dict): Dictionary mapping class names to indices.
+    Returns:
+    - tuple: Developed prediction components including boxes, labels, scores, keypoints, BPMN IDs, flow links, best points, and pool dictionary.
+    """
     pool_dict, boxes, labels, scores, keypoints = regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict)
+    bpmn_id, pool_dict = create_BPMN_id(labels, pool_dict)
     # Create links between elements
     flow_links, best_points = create_links(keypoints, boxes, labels, class_dict)
+    # Correct the labels of some sequenceFlow that cross multiple pools
     labels, flow_links = correction_labels(boxes, labels, class_dict, pool_dict, flow_links)
+    # Give a link to event to allow the creation of the BPMN ID with start, intermediate, and end event
     flow_links = give_link_to_element(flow_links, labels)
+    boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict = last_correction(
+        boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict
+    )
+    return boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict
 def full_prediction(model_object, model_arrow, image, score_threshold=0.5, iou_threshold=0.5, resize=True, distance_treshold=15):
+    """
+    Perform a full prediction by combining object and arrow models and generating data.
+    Parameters:
+    - model_object (torch.nn.Module): The object detection model.
+    - model_arrow (torch.nn.Module): The arrow detection model.
+    - image (torch.Tensor): The input image.
+    - score_threshold (float): Score threshold for filtering predictions.
+    - iou_threshold (float): IoU threshold for non-maximum suppression.
+    - resize (bool): Flag indicating whether to resize the image.
+    - distance_treshold (int): Distance threshold for keypoint correction.
+    Returns:
+    - numpy.array, dict: The processed image and the data dictionary containing prediction information.
+    """
     model_object.eval()  # Set the model to evaluation mode
     model_arrow.eval()  # Set the model to evaluation mode
         boxes, labels, scores, keypoints = mix_predictions(objects_pred, arrow_pred)
+        boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict = develop_prediction(
+            boxes, labels, scores, keypoints, class_dict
+        )
         image = image.permute(1, 2, 0).cpu().numpy()
         image = (image * 255).astype(np.uint8)
         return image, data
 def evaluate_model_by_class(pred_boxes, true_boxes, pred_labels, true_labels, model_dict, iou_threshold=0.5):
+    """
+    Evaluate the model's performance on a per-class basis.
+    Parameters:
+    - pred_boxes (array): Predicted bounding boxes.
+    - true_boxes (array): Ground truth bounding boxes.
+    - pred_labels (array): Predicted labels.
+    - true_labels (array): Ground truth labels.
+    - model_dict (dict): Dictionary mapping model labels to indices.
+    - iou_threshold (float): IoU threshold for determining matches.
+    Returns:
+    - tuple: Precision, recall, and F1-score per class.
+    """
     # Initialize dictionaries to hold per-class counts
     class_tp = {cls: 0 for cls in model_dict.values()}
     class_fp = {cls: 0 for cls in model_dict.values()}
     return class_precision, class_recall, class_f1_score
+def keypoints_measure(pred_boxes, pred_box, true_boxes, true_box, pred_keypoints, true_keypoints, distance_threshold=5):
+    """
+    Measure the accuracy of predicted keypoints compared to true keypoints.
+    Parameters:
+    - pred_boxes (array): Predicted bounding boxes.
+    - pred_box (array): Single predicted bounding box.
+    - true_boxes (array): Ground truth bounding boxes.
+    - true_box (array): Single ground truth bounding box.
+    - pred_keypoints (array): Predicted keypoints.
+    - true_keypoints (array): Ground truth keypoints.
+    - distance_threshold (int): Distance threshold for considering a keypoint match.
+    Returns:
+    - tuple: Number of correct keypoints and whether the keypoints are reverted.
+    """
     result = 0
     reverted = False
+    # Find the position of keypoints in the list
     idx = np.where(pred_boxes == pred_box)[0][0]
     idx2 = np.where(true_boxes == true_box)[0][0]
     return result, reverted
 def evaluate_single_image(pred_boxes, true_boxes, pred_labels, true_labels, pred_keypoints, true_keypoints, iou_threshold=0.5, distance_threshold=5):
+    """
+    Evaluate a single image's predictions against the ground truth.
+    Parameters:
+    - pred_boxes (array): Predicted bounding boxes.
+    - true_boxes (array): Ground truth bounding boxes.
+    - pred_labels (array): Predicted labels.
+    - true_labels (array): Ground truth labels.
+    - pred_keypoints (array): Predicted keypoints.
+    - true_keypoints (array): Ground truth keypoints.
+    - iou_threshold (float): IoU threshold for determining matches.
+    - distance_threshold (int): Distance threshold for considering a keypoint match.
+    Returns:
+    - tuple: True positives, false positives, false negatives, correct labels, incorrect labels, correct keypoints, incorrect keypoints, and reverted keypoints count.
+    """
     tp, fp, fn = 0, 0, 0
     key_t, key_f = 0, 0
     labels_t, labels_f = 0, 0
             iou_val = iou(pred_box, true_box)
             if iou_val >= iou_threshold:
                 if true_keypoints is not None and pred_keypoints is not None:
+                    key_result, reverted = keypoints_measure(
+                        pred_boxes, pred_box, true_boxes, true_box, pred_keypoints, true_keypoints, distance_threshold
+                    )
                     key_t += key_result
                     key_f += 2 - key_result
                     if reverted:
 def pred_4_evaluation(model, loader, score_threshold=0.5, iou_threshold=0.5, distance_threshold=5, key_correction=True, model_type='object'):
+    """
+    Evaluate the model on a dataset using predictions for evaluation.
+    Parameters:
+    - model (torch.nn.Module): The model to evaluate.
+    - loader (torch.utils.data.DataLoader): DataLoader for the dataset.
+    - score_threshold (float): Score threshold for filtering predictions.
+    - iou_threshold (float): IoU threshold for determining matches.
+    - distance_threshold (int): Distance threshold for considering a keypoint match.
+    - key_correction (bool): Whether to apply keypoint correction.
+    - model_type (str): Type of model ('object' or 'arrow').
+    Returns:
+    - tuple: Evaluation results including true positives, false positives, false negatives, correct labels, incorrect labels, correct keypoints, incorrect keypoints, and reverted keypoints count.
+    """
     model.eval()
     tp, fp, fn = 0, 0, 0
     labels_t, labels_f = 0, 0
                 filtered_labels = []
                 filtered_keypoints = []
                 if 'keypoints' not in prediction:
+                    # Create a list of zeros of length equal to the number of boxes
                     pred_keypoints = [np.zeros((2, 3)) for _ in range(len(pred_boxes))]
                 for box, score, label, keypoints in zip(pred_boxes, scores, pred_labels, pred_keypoints):
                     filtered_keypoints = None
                     true_keypoints = None
                 tp_img, fp_img, fn_img, labels_t_img, labels_f_img, key_t_img, key_f_img, reverted_img = evaluate_single_image(
+                    filtered_boxes, true_boxes, filtered_labels, true_labels, filtered_keypoints, true_keypoints, iou_threshold, distance_threshold
+                )
                 tp += tp_img
                 fp += fp_img
     return tp, fp, fn, labels_t, labels_f, key_t, key_f, reverted
+def main_evaluation(model, test_loader, score_threshold=0.5, iou_threshold=0.5, distance_threshold=5, key_correction=True, model_type='object'):
+    """
+    Main function to evaluate the model on the test dataset.
+    Parameters:
+    - model (torch.nn.Module): The model to evaluate.
+    - test_loader (torch.utils.data.DataLoader): DataLoader for the test dataset.
+    - score_threshold (float): Score threshold for filtering predictions.
+    - iou_threshold (float): IoU threshold for determining matches.
+    - distance_threshold (int): Distance threshold for considering a keypoint match.
+    - key_correction (bool): Whether to apply keypoint correction.
+    - model_type (str): Type of model ('object' or 'arrow').
+    Returns:
+    - tuple: Precision, recall, F1-score, key accuracy, and reverted accuracy.
+    """
+    tp, fp, fn, labels_t, labels_f, key_t, key_f, reverted = pred_4_evaluation(
+        model, test_loader, score_threshold, iou_threshold, distance_threshold, key_correction, model_type
+    )
     labels_precision = labels_t / (labels_t + labels_f) if (labels_t + labels_f) > 0 else 0
     precision = tp / (tp + fp) if (tp + fp) > 0 else 0
     return labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy
 def evaluate_model_by_class_single_image(pred_boxes, true_boxes, pred_labels, true_labels, class_tp, class_fp, class_fn, model_dict, iou_threshold=0.5):
+    """
+    Evaluate a single image's predictions on a per-class basis.
+    Parameters:
+    - pred_boxes (array): Predicted bounding boxes.
+    - true_boxes (array): Ground truth bounding boxes.
+    - pred_labels (array): Predicted labels.
+    - true_labels (array): Ground truth labels.
+    - class_tp (dict): Dictionary of true positive counts per class.
+    - class_fp (dict): Dictionary of false positive counts per class.
+    - class_fn (dict): Dictionary of false negative counts per class.
+    - model_dict (dict): Dictionary mapping model labels to indices.
+    - iou_threshold (float): IoU threshold for determining matches.
+    """
     matched_true_boxes = set()
     for pred_idx, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
         match_found = False
         if idx not in matched_true_boxes:
             class_fn[model_dict[true_label]] += 1
 def pred_4_evaluation_per_class(model, loader, score_threshold=0.5, iou_threshold=0.5):
+    """
+    Generate predictions for evaluation on a per-class basis.
+    Parameters:
+    - model (torch.nn.Module): The model to evaluate.
+    - loader (torch.utils.data.DataLoader): DataLoader for the dataset.
+    - score_threshold (float): Score threshold for filtering predictions.
+    - iou_threshold (float): IoU threshold for determining matches.
+    Yields:
+    - tuple: Predicted and true boxes and labels for each batch.
+    """
     model.eval()
     with torch.no_grad():
         for images, targets_im in tqdm(loader, desc="Testing... "):
                 yield pred_boxes, true_boxes, pred_labels, true_labels
 def evaluate_model_by_class(model, test_loader, model_dict, score_threshold=0.5, iou_threshold=0.5):
+    """
+    Evaluate the model's performance on a per-class basis for the entire dataset.
+    Parameters:
+    - model (torch.nn.Module): The model to evaluate.
+    - test_loader (torch.utils.data.DataLoader): DataLoader for the test dataset.
+    - model_dict (dict): Dictionary mapping model labels to indices.
+    - score_threshold (float): Score threshold for filtering predictions.
+    - iou_threshold (float): IoU threshold for determining matches.
+    Returns:
+    - tuple: Precision, recall, and F1-score per class.
+    """
     class_tp = {cls: 0 for cls in model_dict.values()}
     class_fp = {cls: 0 for cls in model_dict.values()}
     class_fn = {cls: 0 for cls in model_dict.values()}
         class_recall[cls] = recall
         class_f1_score[cls] = f1_score
+    return class_precision, class_recall, class_f1_score

modules/streamlit_utils.py CHANGED Viewed

@@ -15,46 +15,64 @@ from modules.display import draw_stream
 from modules.eval import full_prediction
 from modules.train import get_faster_rcnn_model, get_arrow_model
 from streamlit_image_comparison import image_comparison
 from streamlit_image_annotation import detection
 from modules.toXML import create_XML
 from modules.eval import develop_prediction, generate_data
 from modules.utils import class_dict, object_dict
 from modules.htlm_webpage import display_bpmn_xml
 from streamlit_cropper import st_cropper
 from streamlit_image_select import image_select
 from streamlit_js_eval import streamlit_js_eval
 from modules.toWizard import create_wizard_file
 from huggingface_hub import hf_hub_download
 import time
 from modules.toXML import get_size_elements
 def get_memory_usage():
     process = psutil.Process()
     mem_info = process.memory_info()
     return mem_info.rss / (1024 ** 2)  # Return memory usage in MB
 def clear_memory():
     st.session_state.clear()
     gc.collect()
 # Function to read XML content from a file
 def read_xml_file(filepath):
-    """ Read XML content from a file """
     with open(filepath, 'r', encoding='utf-8') as file:
         return file.read()
 # Suppress the symlink warning
 os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
 # Function to load the models only once and use session state to keep track of it
 def load_models():
     with st.spinner('Loading model...'):
         model_object = get_faster_rcnn_model(len(object_dict))
         model_arrow = get_arrow_model(len(arrow_dict), 2)
@@ -71,7 +89,6 @@ def load_models():
         # Load model arrow
         if not Path(output_arrow).exists():
-            # Download model from Hugging Face Hub
             model_arrow.load_state_dict(torch.load(model_arrow_path, map_location=device))
             st.session_state.model_arrow = model_arrow
             print('Model arrow downloaded from Hugging Face Hub')
@@ -82,22 +99,18 @@ def load_models():
             print()
             st.session_state.model_arrow = model_arrow
             print('Model arrow loaded from local file')
         # Load model object
         if not Path(output_object).exists():
-            # Download model from Hugging Face Hub
             model_object.load_state_dict(torch.load(model_object_path, map_location=device))
             st.session_state.model_object = model_object
             print('Model object downloaded from Hugging Face Hub')
-            # Save the model locally
             torch.save(model_object.state_dict(), output_object)
         elif 'model_object' not in st.session_state and Path(output_object).exists():
             model_object.load_state_dict(torch.load(output_object, map_location=device))
             print()
             st.session_state.model_object = model_object
-            print('Model object loaded from local file\n')
         # Move models to device
         model_arrow.to(device)
@@ -110,6 +123,17 @@ def load_models():
 # Function to prepare the image for processing
 def prepare_image(image, pad=True, new_size=(1333, 1333)):
     original_size = image.size
     # Calculate scale to fit the new size while maintaining aspect ratio
     scale = min(new_size[0] / original_size[0], new_size[1] / original_size[1])
@@ -128,6 +152,15 @@ def prepare_image(image, pad=True, new_size=(1333, 1333)):
 # Function to display various options for image annotation
 def display_options(image, score_threshold, is_mobile, screen_width):
     col1, col2, col3, col4, col5 = st.columns(5)
     with col1:
         write_class = st.toggle("Write Class", value=True)
@@ -157,7 +190,7 @@ def display_options(image, score_threshold, is_mobile, screen_width):
     if is_mobile is True:
         width = screen_width
     else:
-        width = screen_width//2
     # Display the original and annotated images side by side
     image_comparison(
@@ -171,8 +204,25 @@ def display_options(image, score_threshold, is_mobile, screen_width):
 # Function to perform inference on the uploaded image using the loaded models
 def perform_inference(model_object, model_arrow, image, score_threshold, is_mobile, screen_width, iou_threshold=0.5, distance_treshold=30, percentage_text_dist_thresh=0.5):
     uploaded_image = prepare_image(image, pad=False)
     img_tensor = F.to_tensor(prepare_image(image.convert('RGB')))
     # Display original image
@@ -181,7 +231,7 @@ def perform_inference(model_object, model_arrow, image, score_threshold, is_mobi
     if is_mobile is False:
         width = screen_width
         if is_mobile is False:
-            width = screen_width//2
         image_placeholder.image(uploaded_image, caption='Original Image', width=width)
     # Perform OCR on the uploaded image
@@ -193,9 +243,9 @@ def perform_inference(model_object, model_arrow, image, score_threshold, is_mobi
     # Prediction
     _, st.session_state.prediction = full_prediction(model_object, model_arrow, img_tensor, score_threshold=score_threshold, iou_threshold=iou_threshold, distance_treshold=distance_treshold)
-    #Mapping text to prediction
     st.session_state.text_mapping = mapping_text(st.session_state.prediction, st.session_state.text_pred, print_sentences=False, percentage_thresh=percentage_text_dist_thresh)
     # Remove the original image display
     image_placeholder.empty()
@@ -204,24 +254,44 @@ def perform_inference(model_object, model_arrow, image, score_threshold, is_mobi
     return image, st.session_state.prediction, st.session_state.text_mapping
 @st.cache_data
 def get_image(uploaded_file):
     return Image.open(uploaded_file).convert('RGB')
 def configure_page():
     st.set_page_config(layout="wide")
     screen_width = streamlit_js_eval(js_expressions='screen.width', want_output=True, key='SCR')
     is_mobile = screen_width is not None and screen_width < 800
     return is_mobile, screen_width
 def display_banner(is_mobile):
-    # JavaScript expression to detect dark mode
-    dark_mode_js = """
-    (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches)
     """
-    # Evaluate JavaScript in Streamlit to check for dark mode
     is_dark_mode = streamlit_js_eval(js_expressions=dark_mode_js, key='dark_mode')
     if is_mobile:
@@ -235,16 +305,27 @@ def display_banner(is_mobile):
         else:
             st.image("./images/banner_desktop.png", use_column_width=True)
 def display_title(is_mobile):
     title = "Welcome on the BPMN AI model recognition app"
     if is_mobile:
         title = "Welcome on the mobile version of BPMN AI model recognition app"
     st.title(title)
 def display_sidebar():
     st.sidebar.header("This BPMN AI model recognition is proposed by: \n ELCA in collaboration with EPFL.")
     st.sidebar.subheader("Instructions:")
-    st.sidebar.text("1. Upload you image")
     st.sidebar.text("2. Crop the image \n  (try to put the BPMN diagram \n   in the center of the image)")
     st.sidebar.text("3. Set the score threshold for\n   prediction (default is 0.5)")
     st.sidebar.text("4. Click on 'Launch Prediction'")
@@ -252,20 +333,20 @@ def display_sidebar():
     st.sidebar.text("6. You can modify the result \n   by clicking on:\n   'Method&Style modification'")
     st.sidebar.text("7. You can change the scale for \n   the XML file and the size of \n   elements (default is 1.0)")
     st.sidebar.text("8. You can modify with modeler \n   and download the result in \n   right format")
     st.sidebar.subheader("If there is an error, try to:")
     st.sidebar.text("1. Change the score threshold")
     st.sidebar.text("2. Re-crop the image by placing\n   the BPMN diagram in the\n   center of the image")
     st.sidebar.text("3. Re-Launch the prediction")
     st.sidebar.subheader("You can close this sidebar")
     for i in range(5):
         st.sidebar.subheader("")
     st.sidebar.subheader("Made with ❤️ by Benjamin.K")
 def initialize_session_state():
     if 'pool_bboxes' not in st.session_state:
         st.session_state.pool_bboxes = []
     if 'model_loaded' not in st.session_state:
@@ -275,7 +356,14 @@ def initialize_session_state():
         load_models()
         st.rerun()
 def load_example_image():
     with st.expander("Use example images"):
         img_selected = image_select(
             "If you have no image and just want to test the demo, click on one of these images",
@@ -287,10 +375,20 @@ def load_example_image():
         )
         return img_selected
 def load_user_image(img_selected, is_mobile):
     if img_selected == './images/none.jpg':
         img_selected = None
     if img_selected is not None:
         uploaded_file = img_selected
     else:
@@ -300,13 +398,23 @@ def load_user_image(img_selected, is_mobile):
             col1, col2 = st.columns(2)
             with col1:
                 uploaded_file = st.file_uploader("Choose an image from my computer...", type=["jpg", "jpeg", "png"])
     return uploaded_file
 def display_image(uploaded_file, screen_width, is_mobile):
     if 'rotation_angle' not in st.session_state:
         st.session_state.rotation_angle = 0  # Initialize the rotation angle in session state
     if 'brightness' not in st.session_state:
         st.session_state.brightness = 1.0  # Initialize brightness in session state
@@ -349,15 +457,23 @@ def display_image(uploaded_file, screen_width, is_mobile):
         if not is_mobile:
             cropped_image = crop_image(adjusted_image, original_image)
         else:
-            st.image(adjusted_image, caption="Image", use_column_width=False, width=int(4/5 * screen_width))
             cropped_image = original_image
     return cropped_image
 def crop_image(resized_image, original_image):
     marge = 10
     cropped_box = st_cropper(
         resized_image,
@@ -373,23 +489,50 @@ def crop_image(resized_image, original_image):
     cropped_image = original_image.crop((x0, y0, x1, y1))
     return cropped_image
 def get_score_threshold(is_mobile):
     col1, col2 = st.columns(2)
     with col1:
-        st.session_state.score_threshold = st.slider("Set score threshold for prediction", min_value=0.0, max_value=1.0, value=0.5, step=0.05)
 def launch_prediction(cropped_image, score_threshold, is_mobile, screen_width):
     st.session_state.crop_image = cropped_image
     with st.spinner('Processing...'):
-        image, _ , _ = perform_inference(
             st.session_state.model_object, st.session_state.model_arrow, st.session_state.crop_image,
             score_threshold, is_mobile, screen_width, iou_threshold=0.3, distance_treshold=30, percentage_text_dist_thresh=0.5
         )
         st.balloons()
         return image
 def modify_results(percentage_text_dist_thresh=0.5):
     with st.expander("Method & Style modification"):
         label_list = list(object_dict.values())
         if st.session_state.prediction['labels'][-1] == 6:
@@ -445,7 +588,6 @@ def modify_results(percentage_text_dist_thresh=0.5):
             object_labels = np.array(object_labels)
             if len(object_bboxes) == len(bboxes):
                 # Calculate absolute differences
                 abs_diff = np.abs(object_bboxes - bboxes)
@@ -456,7 +598,7 @@ def modify_results(percentage_text_dist_thresh=0.5):
                             changes = True
                             break
-                #check if labels are the same
                 if not np.array_equal(object_labels, new_lab):
                     changes = True
             else:
@@ -477,7 +619,6 @@ def modify_results(percentage_text_dist_thresh=0.5):
             new_scores = np.concatenate((object_scores, arrow_score))
             new_keypoints = np.concatenate((object_keypoints, arrow_keypoints))
             boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict = develop_prediction(new_bbox, new_lab, new_scores, new_keypoints, class_dict)
             st.session_state.prediction = generate_data(st.session_state.prediction['image'], boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict)
@@ -489,21 +630,35 @@ def modify_results(percentage_text_dist_thresh=0.5):
             return True
 def display_bpmn_modeler(is_mobile, screen_width):
     with st.spinner('Waiting for BPMN modeler...'):
         st.session_state.bpmn_xml = create_XML(
             st.session_state.prediction.copy(), st.session_state.text_mapping,
             st.session_state.size_scale, st.session_state.scale
         )
-        st.session_state.vizi_file = create_wizard_file(st.session_state.prediction.copy(), st.session_state.text_mapping)
         display_bpmn_xml(st.session_state.bpmn_xml, st.session_state.vizi_file, is_mobile=is_mobile, screen_width=int(4/5 * screen_width))
 def find_best_scale(pred, size_elements):
     boxes = pred['boxes']
     labels = pred['labels']
@@ -535,6 +690,12 @@ def find_best_scale(pred, size_elements):
     return best_scale
 def modeler_options(is_mobile):
     if not is_mobile:
         with st.expander("Options for BPMN modeler"):
             col1, col2 = st.columns(2)
@@ -545,4 +706,4 @@ def modeler_options(is_mobile):
                 st.session_state.size_scale = st.slider("Set size object scale for XML file", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
     else:
         st.session_state.scale = 1.0
-        st.session_state.size_scale = 1.0

 from modules.eval import full_prediction
 from modules.train import get_faster_rcnn_model, get_arrow_model
 from streamlit_image_comparison import image_comparison
 from streamlit_image_annotation import detection
 from modules.toXML import create_XML
 from modules.eval import develop_prediction, generate_data
 from modules.utils import class_dict, object_dict
 from modules.htlm_webpage import display_bpmn_xml
 from streamlit_cropper import st_cropper
 from streamlit_image_select import image_select
 from streamlit_js_eval import streamlit_js_eval
 from modules.toWizard import create_wizard_file
 from huggingface_hub import hf_hub_download
 import time
 from modules.toXML import get_size_elements
+# Function to get memory usage
 def get_memory_usage():
+    """
+    Returns the current memory usage of the process in MB.
+    """
     process = psutil.Process()
     mem_info = process.memory_info()
     return mem_info.rss / (1024 ** 2)  # Return memory usage in MB
+# Function to clear memory
 def clear_memory():
+    """
+    Clears the Streamlit session state and triggers garbage collection.
+    """
     st.session_state.clear()
     gc.collect()
 # Function to read XML content from a file
 def read_xml_file(filepath):
+    """
+    Reads and returns the content of an XML file.
+    Parameters:
+    - filepath (str): The path to the XML file.
+    Returns:
+    - str: The content of the XML file.
+    """
     with open(filepath, 'r', encoding='utf-8') as file:
         return file.read()
 # Suppress the symlink warning
 os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
 # Function to load the models only once and use session state to keep track of it
 def load_models():
+    """
+    Loads the object and arrow detection models, either from the local file or
+    downloads from the Hugging Face Hub if not available locally. The models
+    are stored in the Streamlit session state.
+    Returns:
+    - model_object (torch.nn.Module): The loaded object detection model.
+    - model_arrow (torch.nn.Module): The loaded arrow detection model.
+    """
     with st.spinner('Loading model...'):
         model_object = get_faster_rcnn_model(len(object_dict))
         model_arrow = get_arrow_model(len(arrow_dict), 2)
         # Load model arrow
         if not Path(output_arrow).exists():
             model_arrow.load_state_dict(torch.load(model_arrow_path, map_location=device))
             st.session_state.model_arrow = model_arrow
             print('Model arrow downloaded from Hugging Face Hub')
             print()
             st.session_state.model_arrow = model_arrow
             print('Model arrow loaded from local file')
         # Load model object
         if not Path(output_object).exists():
             model_object.load_state_dict(torch.load(model_object_path, map_location=device))
             st.session_state.model_object = model_object
             print('Model object downloaded from Hugging Face Hub')
             torch.save(model_object.state_dict(), output_object)
         elif 'model_object' not in st.session_state and Path(output_object).exists():
             model_object.load_state_dict(torch.load(output_object, map_location=device))
             print()
             st.session_state.model_object = model_object
+            print('Model object loaded from local file')
         # Move models to device
         model_arrow.to(device)
 # Function to prepare the image for processing
 def prepare_image(image, pad=True, new_size=(1333, 1333)):
+    """
+    Resizes and optionally pads the input image to a new size.
+    Parameters:
+    - image (PIL.Image): The image to be processed.
+    - pad (bool): Whether to pad the image to the new size.
+    - new_size (tuple): The target size for the image.
+    Returns:
+    - PIL.Image: The processed image.
+    """
     original_size = image.size
     # Calculate scale to fit the new size while maintaining aspect ratio
     scale = min(new_size[0] / original_size[0], new_size[1] / original_size[1])
 # Function to display various options for image annotation
 def display_options(image, score_threshold, is_mobile, screen_width):
+    """
+    Displays various options for image annotation and draws the annotated image.
+    Parameters:
+    - image (PIL.Image): The image to be annotated.
+    - score_threshold (float): The score threshold for displaying annotations.
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    - screen_width (int): The width of the screen.
+    """
     col1, col2, col3, col4, col5 = st.columns(5)
     with col1:
         write_class = st.toggle("Write Class", value=True)
     if is_mobile is True:
         width = screen_width
     else:
+        width = screen_width // 2
     # Display the original and annotated images side by side
     image_comparison(
 # Function to perform inference on the uploaded image using the loaded models
 def perform_inference(model_object, model_arrow, image, score_threshold, is_mobile, screen_width, iou_threshold=0.5, distance_treshold=30, percentage_text_dist_thresh=0.5):
+    """
+    Performs inference on the uploaded image using the loaded models and updates
+    the session state with predictions and text mappings.
+    Parameters:
+    - model_object (torch.nn.Module): The object detection model.
+    - model_arrow (torch.nn.Module): The arrow detection model.
+    - image (PIL.Image): The uploaded image.
+    - score_threshold (float): The score threshold for displaying annotations.
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    - screen_width (int): The width of the screen.
+    - iou_threshold (float): The IoU threshold for filtering boxes.
+    - distance_treshold (int): The distance threshold for matching keypoints.
+    - percentage_text_dist_thresh (float): The percentage distance threshold for text mapping.
+    Returns:
+    - tuple: The processed image, prediction, and text mapping.
+    """
     uploaded_image = prepare_image(image, pad=False)
     img_tensor = F.to_tensor(prepare_image(image.convert('RGB')))
     # Display original image
     if is_mobile is False:
         width = screen_width
         if is_mobile is False:
+            width = screen_width // 2
         image_placeholder.image(uploaded_image, caption='Original Image', width=width)
     # Perform OCR on the uploaded image
     # Prediction
     _, st.session_state.prediction = full_prediction(model_object, model_arrow, img_tensor, score_threshold=score_threshold, iou_threshold=iou_threshold, distance_treshold=distance_treshold)
+    # Mapping text to prediction
     st.session_state.text_mapping = mapping_text(st.session_state.prediction, st.session_state.text_pred, print_sentences=False, percentage_thresh=percentage_text_dist_thresh)
     # Remove the original image display
     image_placeholder.empty()
     return image, st.session_state.prediction, st.session_state.text_mapping
+# Function to get the image from the uploaded file
 @st.cache_data
 def get_image(uploaded_file):
+    """
+    Opens and converts the uploaded image file to RGB format.
+    Parameters:
+    - uploaded_file: The uploaded image file.
+    Returns:
+    - PIL.Image: The opened and converted image.
+    """
     return Image.open(uploaded_file).convert('RGB')
+# Function to configure the Streamlit page
 def configure_page():
+    """
+    Configures the Streamlit page layout and returns the screen width
+    and a flag indicating if the device is mobile.
+    Returns:
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    - screen_width (int): The width of the screen.
+    """
     st.set_page_config(layout="wide")
     screen_width = streamlit_js_eval(js_expressions='screen.width', want_output=True, key='SCR')
     is_mobile = screen_width is not None and screen_width < 800
     return is_mobile, screen_width
+# Function to display the banner based on device type and theme
 def display_banner(is_mobile):
     """
+    Displays the appropriate banner image based on device type and dark mode preference.
+    Parameters:
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    """
+    dark_mode_js = "(window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches)"
     is_dark_mode = streamlit_js_eval(js_expressions=dark_mode_js, key='dark_mode')
     if is_mobile:
         else:
             st.image("./images/banner_desktop.png", use_column_width=True)
+# Function to display the title based on device type
 def display_title(is_mobile):
+    """
+    Displays the title of the app based on device type.
+    Parameters:
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    """
     title = "Welcome on the BPMN AI model recognition app"
     if is_mobile:
         title = "Welcome on the mobile version of BPMN AI model recognition app"
     st.title(title)
+# Function to display the sidebar with instructions and information
 def display_sidebar():
+    """
+    Displays the sidebar with instructions and information about the app.
+    """
     st.sidebar.header("This BPMN AI model recognition is proposed by: \n ELCA in collaboration with EPFL.")
     st.sidebar.subheader("Instructions:")
+    st.sidebar.text("1. Upload your image")
     st.sidebar.text("2. Crop the image \n  (try to put the BPMN diagram \n   in the center of the image)")
     st.sidebar.text("3. Set the score threshold for\n   prediction (default is 0.5)")
     st.sidebar.text("4. Click on 'Launch Prediction'")
     st.sidebar.text("6. You can modify the result \n   by clicking on:\n   'Method&Style modification'")
     st.sidebar.text("7. You can change the scale for \n   the XML file and the size of \n   elements (default is 1.0)")
     st.sidebar.text("8. You can modify with modeler \n   and download the result in \n   right format")
     st.sidebar.subheader("If there is an error, try to:")
     st.sidebar.text("1. Change the score threshold")
     st.sidebar.text("2. Re-crop the image by placing\n   the BPMN diagram in the\n   center of the image")
     st.sidebar.text("3. Re-Launch the prediction")
     st.sidebar.subheader("You can close this sidebar")
     for i in range(5):
         st.sidebar.subheader("")
     st.sidebar.subheader("Made with ❤️ by Benjamin.K")
+# Function to initialize session state variables
 def initialize_session_state():
+    """
+    Initializes the session state variables for the app.
+    """
     if 'pool_bboxes' not in st.session_state:
         st.session_state.pool_bboxes = []
     if 'model_loaded' not in st.session_state:
         load_models()
         st.rerun()
+# Function to load example images for testing
 def load_example_image():
+    """
+    Loads example images for testing the app and returns the selected image.
+    Returns:
+    - str: The path to the selected example image.
+    """
     with st.expander("Use example images"):
         img_selected = image_select(
             "If you have no image and just want to test the demo, click on one of these images",
         )
         return img_selected
+# Function to load user-uploaded images or selected example images
 def load_user_image(img_selected, is_mobile):
+    """
+    Loads the user-uploaded image or the selected example image.
+    Parameters:
+    - img_selected (str): The path to the selected example image.
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    Returns:
+    - str: The path to the uploaded image file.
+    """
     if img_selected == './images/none.jpg':
         img_selected = None
     if img_selected is not None:
         uploaded_file = img_selected
     else:
             col1, col2 = st.columns(2)
             with col1:
                 uploaded_file = st.file_uploader("Choose an image from my computer...", type=["jpg", "jpeg", "png"])
     return uploaded_file
+# Function to display the uploaded or example image
 def display_image(uploaded_file, screen_width, is_mobile):
+    """
+    Displays the uploaded or selected example image with options to rotate and adjust brightness.
+    Parameters:
+    - uploaded_file: The uploaded image file.
+    - screen_width (int): The width of the screen.
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    Returns:
+    - PIL.Image: The cropped and adjusted image.
+    """
     if 'rotation_angle' not in st.session_state:
         st.session_state.rotation_angle = 0  # Initialize the rotation angle in session state
     if 'brightness' not in st.session_state:
         st.session_state.brightness = 1.0  # Initialize brightness in session state
         if not is_mobile:
             cropped_image = crop_image(adjusted_image, original_image)
         else:
+            st.image(adjusted_image, caption="Image", use_column_width=False, width=int(4 / 5 * screen_width))
             cropped_image = original_image
     return cropped_image
+# Function to crop the image
 def crop_image(resized_image, original_image):
+    """
+    Crops the resized image based on user input.
+    Parameters:
+    - resized_image (PIL.Image): The resized image.
+    - original_image (PIL.Image): The original image.
+    Returns:
+    - PIL.Image: The cropped image.
+    """
     marge = 10
     cropped_box = st_cropper(
         resized_image,
     cropped_image = original_image.crop((x0, y0, x1, y1))
     return cropped_image
+# Function to get the score threshold for prediction
 def get_score_threshold(is_mobile):
+    """
+    Displays a slider to set the score threshold for prediction.
+    Parameters:
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    """
     col1, col2 = st.columns(2)
     with col1:
+        st.session_state.score_threshold = st.slider("Set score threshold for prediction", min_value=0.0, max_value=1.0, value=0.5, step=0.05)
 def launch_prediction(cropped_image, score_threshold, is_mobile, screen_width):
+    """
+    Launches the prediction process on the cropped image and displays balloons upon completion.
+    Parameters:
+    - cropped_image (PIL.Image): The cropped image to be processed.
+    - score_threshold (float): The score threshold for predictions.
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    - screen_width (int): The width of the screen.
+    Returns:
+    - PIL.Image: The image after performing inference.
+    """
     st.session_state.crop_image = cropped_image
     with st.spinner('Processing...'):
+        image, _, _ = perform_inference(
             st.session_state.model_object, st.session_state.model_arrow, st.session_state.crop_image,
             score_threshold, is_mobile, screen_width, iou_threshold=0.3, distance_treshold=30, percentage_text_dist_thresh=0.5
         )
         st.balloons()
         return image
 def modify_results(percentage_text_dist_thresh=0.5):
+    """
+    Allows the user to modify the results using method and style modification.
+    Parameters:
+    - percentage_text_dist_thresh (float): Threshold for mapping text to predictions based on percentage distance.
+    Returns:
+    - bool: True if changes are detected and modifications are made, otherwise False.
+    """
     with st.expander("Method & Style modification"):
         label_list = list(object_dict.values())
         if st.session_state.prediction['labels'][-1] == 6:
             object_labels = np.array(object_labels)
             if len(object_bboxes) == len(bboxes):
                 # Calculate absolute differences
                 abs_diff = np.abs(object_bboxes - bboxes)
                             changes = True
                             break
+                # Check if labels are the same
                 if not np.array_equal(object_labels, new_lab):
                     changes = True
             else:
             new_scores = np.concatenate((object_scores, arrow_score))
             new_keypoints = np.concatenate((object_keypoints, arrow_keypoints))
             boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict = develop_prediction(new_bbox, new_lab, new_scores, new_keypoints, class_dict)
             st.session_state.prediction = generate_data(st.session_state.prediction['image'], boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict)
             return True
 def display_bpmn_modeler(is_mobile, screen_width):
+    """
+    Displays the BPMN modeler with the current prediction and text mapping.
+    Parameters:
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    - screen_width (int): The width of the screen.
+    """
     with st.spinner('Waiting for BPMN modeler...'):
         st.session_state.bpmn_xml = create_XML(
             st.session_state.prediction.copy(), st.session_state.text_mapping,
             st.session_state.size_scale, st.session_state.scale
         )
+        st.session_state.vizi_file = create_wizard_file(st.session_state.prediction.copy(), st.session_state.text_mapping)
         display_bpmn_xml(st.session_state.bpmn_xml, st.session_state.vizi_file, is_mobile=is_mobile, screen_width=int(4/5 * screen_width))
 def find_best_scale(pred, size_elements):
+    """
+    Finds the best scale for the elements in the prediction.
+    Parameters:
+    - pred (dict): The prediction data.
+    - size_elements (dict): The size elements dictionary.
+    Returns:
+    - float: The best scale for the elements.
+    """
     boxes = pred['boxes']
     labels = pred['labels']
     return best_scale
 def modeler_options(is_mobile):
+    """
+    Displays options for the BPMN modeler.
+    Parameters:
+    - is_mobile (bool): Flag indicating if the device is mobile.
+    """
     if not is_mobile:
         with st.expander("Options for BPMN modeler"):
             col1, col2 = st.columns(2)
                 st.session_state.size_scale = st.slider("Set size object scale for XML file", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
     else:
         st.session_state.scale = 1.0
+        st.session_state.size_scale = 1.0

modules/toWizard.py CHANGED Viewed

@@ -4,13 +4,31 @@ from xml.dom import minidom
 from modules.utils import error
 from modules.OCR import analyze_sentiment
 def rescale(scale, boxes):
     for i in range(len(boxes)):
         boxes[i] = [boxes[i][0] * scale, boxes[i][1] * scale, boxes[i][2] * scale, boxes[i][3] * scale]
     return boxes
 def create_BPMN_id(data):
     enum_end, enum_start, enum_task, enum_sequence, enum_dataflow, enum_messflow, enum_messageEvent, enum_exclusiveGateway, enum_parallelGateway, enum_pool = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
     BPMN_name = [class_dict[data['labels'][i]] for i in range(len(data['labels']))]
     for idx, Bpmn_id in enumerate(BPMN_name):
@@ -49,15 +67,35 @@ def create_BPMN_id(data):
     return data
 def check_end(link):
     if link[1] is None:
         return True
     return False
 def connect(data, text_mapping, i):
     next_text = []
     target_idx = data['links'][i][1]
     # Check if the target index is valid
-    if target_idx==None or target_idx >= len(data['links']):
         error('There may be an error with the Vizi file, care when you download it.')
         return None, None, None
@@ -80,11 +118,30 @@ def connect(data, text_mapping, i):
     return current_text, next_text, next_id
 def check_start(val):
     if val[0] is None:
         return True
     return False
 def find_merge(bpmn_id, links):
     merge = []
     for idx, link in enumerate(links):
         next_element = link[1]
@@ -104,7 +161,7 @@ def find_merge(bpmn_id, links):
         if element is None:
             merge_elements[idx] = False
             continue
-        #count how many time the element is in the list
         count = merge.count(element)
         if count > 1:
             merge_elements[idx] = True
@@ -114,6 +171,17 @@ def find_merge(bpmn_id, links):
     return merge_elements
 def find_positive_end(bpmn_ids, links, text_mapping):
     emotion_data = []
     for idx, bpmn_id in enumerate(bpmn_ids):
         if idx >= len(links):
@@ -130,6 +198,15 @@ def find_positive_end(bpmn_ids, links, text_mapping):
     return sorted_emotions[0][0] if len(sorted_emotions) > 0 else None
 def find_best_direction(texts_list):
     emotion_data = []
     for text in texts_list:
         highest_emotion, highest_score = analyze_sentiment(text)
@@ -141,18 +218,24 @@ def find_best_direction(texts_list):
     return sorted_emotions[0][0] if len(sorted_emotions) > 0 else None
 def create_wizard_file(data, text_mapping):
     not_change = ['pool','sequenceFlow','messageFlow','dataAssociation']
-    #add a name into the text_mapping when there is no name
     for idx, key in enumerate(text_mapping.keys()):
         if text_mapping[key] == '' and key.split('_')[0] not in not_change:
             text_mapping[key] = f'unnamed_{key}'
     root = ET.Element('methodAndStyleWizard')
     modelName = ET.SubElement(root, 'modelName')
@@ -179,7 +262,7 @@ def create_wizard_file(data, text_mapping):
             eventType = 'None'
         if idx >= len(data['links']):
             continue
-        if check_start(data['links'][idx]) and (element_type=='event' or element_type=='message'):
             if text_mapping[Bpmn_id] == '':
                 text_mapping[Bpmn_id] = 'start'
             startEvent = ET.SubElement(root, 'startEvent', attrib={'name': text_mapping[Bpmn_id], 'eventType': eventType, 'isRegular': 'True'})
@@ -191,8 +274,7 @@ def create_wizard_file(data, text_mapping):
     positive_end = find_positive_end(data['BPMN_id'], data['links'], text_mapping)
     if positive_end is not None:
-        print("Best end is: ",text_mapping[positive_end])
     # Add end states event to the collaboration element
     for idx, Bpmn_id in enumerate(data['BPMN_id']):
@@ -208,7 +290,6 @@ def create_wizard_file(data, text_mapping):
             else:
                 ET.SubElement(endEvents, 'endState', attrib={'name': text_mapping[Bpmn_id], 'eventType': 'None', 'isRegular': 'False'})
     # Add activities to the collaboration element
     activities = ET.SubElement(root, 'activities')
     for idx, activity_name in enumerate(data['BPMN_id']):
@@ -269,7 +350,7 @@ def create_wizard_file(data, text_mapping):
     ET.SubElement(root, 'participants')
     # Pretty print the XML
-    xml_str = ET.tostring(root, encoding='utf-8', method='xml')
-    pretty_xml_str = minidom.parseString(xml_str).toprettyxml(indent="    ")
-    return pretty_xml_str

 from modules.utils import error
 from modules.OCR import analyze_sentiment
 def rescale(scale, boxes):
+    """
+    Rescale the coordinates of the bounding boxes by a given scale factor.
+    Args:
+        scale (float): The scale factor to apply.
+        boxes (list): List of bounding boxes to be rescaled.
+    Returns:
+        list: Rescaled bounding boxes.
+    """
     for i in range(len(boxes)):
         boxes[i] = [boxes[i][0] * scale, boxes[i][1] * scale, boxes[i][2] * scale, boxes[i][3] * scale]
     return boxes
 def create_BPMN_id(data):
+    """
+    Create unique BPMN IDs for each element in the data based on their types.
+    Args:
+        data (dict): Dictionary containing labels and links of elements.
+    Returns:
+        dict: Updated data with BPMN IDs assigned.
+    """
     enum_end, enum_start, enum_task, enum_sequence, enum_dataflow, enum_messflow, enum_messageEvent, enum_exclusiveGateway, enum_parallelGateway, enum_pool = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
     BPMN_name = [class_dict[data['labels'][i]] for i in range(len(data['labels']))]
     for idx, Bpmn_id in enumerate(BPMN_name):
     return data
 def check_end(link):
+    """
+    Check if a link represents an end event.
+    Args:
+        link (tuple): A link containing indices of connected elements.
+    Returns:
+        bool: True if the link represents an end event, False otherwise.
+    """
     if link[1] is None:
         return True
     return False
 def connect(data, text_mapping, i):
+    """
+    Connect elements based on their links and generate the corresponding text mapping.
+    Args:
+        data (dict): Data containing links and BPMN IDs.
+        text_mapping (dict): Mapping of BPMN IDs to their text descriptions.
+        i (int): Index of the current element.
+    Returns:
+        tuple: Current text, next texts, and next ID.
+    """
     next_text = []
     target_idx = data['links'][i][1]
     # Check if the target index is valid
+    if target_idx == None or target_idx >= len(data['links']):
         error('There may be an error with the Vizi file, care when you download it.')
         return None, None, None
     return current_text, next_text, next_id
 def check_start(val):
+    """
+    Check if a link represents a start event.
+    Args:
+        val (tuple): A link containing indices of connected elements.
+    Returns:
+        bool: True if the link represents a start event, False otherwise.
+    """
     if val[0] is None:
         return True
     return False
 def find_merge(bpmn_id, links):
+    """
+    Identify merge points in the BPMN diagram.
+    Args:
+        bpmn_id (list): List of BPMN IDs.
+        links (list): List of links between elements.
+    Returns:
+        list: List indicating merge points.
+    """
     merge = []
     for idx, link in enumerate(links):
         next_element = link[1]
         if element is None:
             merge_elements[idx] = False
             continue
+        # Count how many times the element is in the list
         count = merge.count(element)
         if count > 1:
             merge_elements[idx] = True
     return merge_elements
 def find_positive_end(bpmn_ids, links, text_mapping):
+    """
+    Find the positive end event based on sentiment analysis.
+    Args:
+        bpmn_ids (list): List of BPMN IDs.
+        links (list): List of links between elements.
+        text_mapping (dict): Mapping of BPMN IDs to their text descriptions.
+    Returns:
+        str: BPMN ID of the positive end event.
+    """
     emotion_data = []
     for idx, bpmn_id in enumerate(bpmn_ids):
         if idx >= len(links):
     return sorted_emotions[0][0] if len(sorted_emotions) > 0 else None
 def find_best_direction(texts_list):
+    """
+    Find the best direction based on sentiment analysis.
+    Args:
+        texts_list (list): List of texts to analyze.
+    Returns:
+        str: Text with the best (positive) sentiment.
+    """
     emotion_data = []
     for text in texts_list:
         highest_emotion, highest_score = analyze_sentiment(text)
     return sorted_emotions[0][0] if len(sorted_emotions) > 0 else None
 def create_wizard_file(data, text_mapping):
+    """
+    Create a wizard file for BPMN modeling based on the provided data and text mappings.
+    Args:
+        data (dict): Data containing BPMN elements and their properties.
+        text_mapping (dict): Mapping of BPMN IDs to their text descriptions.
+    Returns:
+        str: Pretty-printed XML string of the wizard file.
+    """
     not_change = ['pool','sequenceFlow','messageFlow','dataAssociation']
+    # Add a name into the text_mapping when there is no name
     for idx, key in enumerate(text_mapping.keys()):
         if text_mapping[key] == '' and key.split('_')[0] not in not_change:
             text_mapping[key] = f'unnamed_{key}'
     root = ET.Element('methodAndStyleWizard')
     modelName = ET.SubElement(root, 'modelName')
             eventType = 'None'
         if idx >= len(data['links']):
             continue
+        if check_start(data['links'][idx]) and (element_type == 'event' or element_type == 'message'):
             if text_mapping[Bpmn_id] == '':
                 text_mapping[Bpmn_id] = 'start'
             startEvent = ET.SubElement(root, 'startEvent', attrib={'name': text_mapping[Bpmn_id], 'eventType': eventType, 'isRegular': 'True'})
     positive_end = find_positive_end(data['BPMN_id'], data['links'], text_mapping)
     if positive_end is not None:
+        print("Best end is: ", text_mapping[positive_end])
     # Add end states event to the collaboration element
     for idx, Bpmn_id in enumerate(data['BPMN_id']):
             else:
                 ET.SubElement(endEvents, 'endState', attrib={'name': text_mapping[Bpmn_id], 'eventType': 'None', 'isRegular': 'False'})
     # Add activities to the collaboration element
     activities = ET.SubElement(root, 'activities')
     for idx, activity_name in enumerate(data['BPMN_id']):
     ET.SubElement(root, 'participants')
     # Pretty print the XML
+    pwm_str = ET.tostring(root, encoding='utf-8', method='xml')
+    pretty_pwm_str = minidom.parseString(pwm_str).toprettyxml(indent="    ")
+    return pretty_pwm_str

modules/toXML.py CHANGED Viewed

@@ -7,7 +7,16 @@ from xml.dom import minidom
 import numpy as np
 def find_position(pool_index, BPMN_id):
-    #find the position of the pool_index in the bpmn_id
     if pool_index in BPMN_id:
         position = BPMN_id.index(pool_index)
     else:
@@ -18,6 +27,16 @@ def find_position(pool_index, BPMN_id):
 # Calculate the center of each bounding box and group them by pool
 def calculate_centers_and_group_by_pool(pred, class_dict):
     pool_groups = {}
     for pool_index, element_indices in pred['pool_dict'].items():
         pool_groups[pool_index] = []
@@ -26,12 +45,23 @@ def calculate_centers_and_group_by_pool(pred, class_dict):
                 continue
             if class_dict[pred['labels'][i]] not in ['dataObject', 'dataStore']:
                 x1, y1, x2, y2 = pred['boxes'][i]
-                center = [(x1 + x2) / 2, (y1 + y2) / 2]
                 pool_groups[pool_index].append((center, i))
     return pool_groups
 # Group centers within a specified range
 def group_centers(centers, axis, range_=50):
     groups = []
     while centers:
         center, idx = centers.pop(0)
@@ -45,18 +75,38 @@ def group_centers(centers, axis, range_=50):
 # Align the elements within each pool
 def align_elements_within_pool(modified_pred, pool_groups, class_dict, size):
     for pool_index, centers in pool_groups.items():
         y_groups = group_centers(centers.copy(), axis=1)
         align_y_coordinates(modified_pred, y_groups, class_dict, size)
         centers = recalculate_centers(modified_pred, y_groups)
         x_groups = group_centers(centers.copy(), axis=0)
         align_x_coordinates(modified_pred, x_groups, class_dict, size)
 # Align the y-coordinates of the centers of grouped bounding boxes
 def align_y_coordinates(modified_pred, y_groups, class_dict, size):
     for group in y_groups:
-        avg_y = sum([c[0][1] for c in group]) / len(group)
         for (center, idx) in group:
             label = class_dict[modified_pred['labels'][idx]]
             if label in size:
@@ -70,18 +120,37 @@ def align_y_coordinates(modified_pred, y_groups, class_dict, size):
 # Recalculate centers after alignment
 def recalculate_centers(modified_pred, groups):
     centers = []
     for group in groups:
         for center, idx in group:
             x1, y1, x2, y2 = modified_pred['boxes'][idx]
-            center = [(x1 + x2) / 2, (y1 + y2) / 2]
             centers.append((center, idx))
     return centers
 # Align the x-coordinates of the centers of grouped bounding boxes
 def align_x_coordinates(modified_pred, x_groups, class_dict, size):
     for group in x_groups:
-        avg_x = sum([c[0][0] for c in group]) / len(group)
         for (center, idx) in group:
             label = class_dict[modified_pred['labels'][idx]]
             if label in size:
@@ -95,6 +164,13 @@ def align_x_coordinates(modified_pred, x_groups, class_dict, size):
 # Expand the pool bounding boxes to fit the aligned elements
 def expand_pool_bounding_boxes(modified_pred, size_elements):
     for idx, (pool_index, keep_elements) in enumerate(modified_pred['pool_dict'].items()):
         if len(keep_elements) != 0:
             marge = size_elements['task'][1] // 2
@@ -114,10 +190,18 @@ def expand_pool_bounding_boxes(modified_pred, size_elements):
             error("The pool is maybe too small, please add more elements or increase the scale by zooming on the image.")
             continue
         modified_pred['boxes'][position] = [min_x - marge, min_y - marge//2, min_x + pool_width + marge, min_y + pool_height + marge//2]
 # Adjust left and right boundaries of all pools
 def adjust_pool_boundaries(modified_pred, pred):
     min_left, max_right = 0, 0
     for pool_index, element_indices in pred['pool_dict'].items():
         position = find_position(pool_index, modified_pred['BPMN_id'])
@@ -140,10 +224,22 @@ def adjust_pool_boundaries(modified_pred, pred):
             x1 = min_left
         if x2 < max_right:
             x2 = max_right
         modified_pred['boxes'][position] = [x1, y1, x2, y2]
 # Main function to align boxes
 def align_boxes(pred, size, class_dict):
     modified_pred = copy.deepcopy(pred)
     pool_groups = calculate_centers_and_group_by_pool(pred, class_dict)
     align_elements_within_pool(modified_pred, pool_groups, class_dict, size)
@@ -154,9 +250,20 @@ def align_boxes(pred, size, class_dict):
     return modified_pred['boxes']
 # Function to create a BPMN XML file from prediction results
 def create_XML(full_pred, text_mapping, size_scale, scale):
     namespaces = {
         'bpmn': 'http://www.omg.org/spec/BPMN/20100524/MODEL',
         'bpmndi': 'http://www.omg.org/spec/BPMN/20100524/DI',
@@ -165,7 +272,6 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
         'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
     }
     definitions = ET.Element('bpmn:definitions', {
         'xmlns:xsi': namespaces['xsi'],
         'xmlns:bpmn': namespaces['bpmn'],
@@ -176,14 +282,13 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
         'id': "simpleExample"
     })
     size_elements = get_size_elements(size_scale)
-    #if there is no pool or lane, create a pool with all elements
     if len(full_pred['pool_dict']) == 0 or (len(full_pred['pool_dict']) == 1 and len(next(iter(full_pred['pool_dict'].values()))) == len(full_pred['labels'])):
         full_pred, text_mapping = create_big_pool(full_pred, text_mapping, size_elements)
-    #modify the boxes positions
     old_boxes = copy.deepcopy(full_pred)
     # Create BPMN collaboration element
@@ -191,16 +296,16 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
     # Create BPMN process elements
     process = []
-    for idx in range (len(full_pred['pool_dict'].items())):
-        process_id = f'process_{idx+1}'
         process.append(ET.SubElement(definitions, 'bpmn:process', id=process_id, isExecutable='false'))
     bpmndi = ET.SubElement(definitions, 'bpmndi:BPMNDiagram', id='BPMNDiagram_1')
     bpmnplane = ET.SubElement(bpmndi, 'bpmndi:BPMNPlane', id='BPMNPlane_1', bpmnElement='collaboration_1')
     full_pred['boxes'] = rescale_boxes(scale, old_boxes['boxes'])
     full_pred['boxes'] = align_boxes(full_pred, size_elements, class_dict)
     # Add diagram elements for each pool
     for idx, (pool_index, keep_elements) in enumerate(full_pred['pool_dict'].items()):
@@ -208,8 +313,6 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
         pool = ET.SubElement(collaboration, 'bpmn:participant', id=pool_id, processRef=f'process_{idx+1}', name=text_mapping[pool_index])
         position = find_position(pool_index, full_pred['BPMN_id'])
-        # Calculate the bounding box for the pool
-        #if len(keep_elements) == 0:
         if position >= len(full_pred['boxes']):
             print("Problem with the index")
             continue
@@ -219,7 +322,6 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
         add_diagram_elements(bpmnplane, pool_id, min_x, min_y, pool_width, pool_height)
     # Create BPMN elements for each pool
     for idx, (pool_index, keep_elements) in enumerate(full_pred['pool_dict'].items()):
         create_bpmn_object(process[idx], bpmnplane, text_mapping, definitions, size_elements, full_pred, keep_elements)
@@ -244,6 +346,7 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
     reparsed = minidom.parseString(rough_string)
     pretty_xml_as_string = reparsed.toprettyxml(indent="  ")
     full_pred['boxes'] = rescale_boxes(1/scale, full_pred['boxes'])
     full_pred['boxes'] = old_boxes
@@ -251,11 +354,22 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
 # Function that creates a single pool with all elements
 def create_big_pool(full_pred, text_mapping, size_elements, marge=50):
-    # If no pools or lanes are detected, create a single pool with all elements
     new_pool_index = 'pool_1'
     size_elements = get_size_elements(st.session_state.size_scale)
     elements_pool = list(range(len(full_pred['boxes'])))
-    min_x, min_y, max_x, max_y = calculate_pool_bounds(full_pred['boxes'],full_pred['labels'], elements_pool, size_elements)
     box = [min_x - marge, min_y - marge//2, max_x + marge, max_y + marge//2]
     full_pred['boxes'] = np.append(full_pred['boxes'], [box], axis=0)
     full_pred['pool_dict'][new_pool_index] = elements_pool
@@ -266,33 +380,61 @@ def create_big_pool(full_pred, text_mapping, size_elements, marge=50):
 # Function that gives the size of the elements
 def get_size_elements(size_scale=1):
     size_elements = {
-        'event': (size_scale*43.2, size_scale*43.2),
-        'task': (size_scale*120, size_scale*96),
-        'message': (size_scale*43.2, size_scale*43.2),
-        'messageEvent': (size_scale*43.2, size_scale*43.2),
-        'exclusiveGateway': (size_scale*60, size_scale*60),
-        'parallelGateway': (size_scale*60, size_scale*60),
-        'dataObject': (size_scale*48, size_scale*72),
-        'dataStore': (size_scale*72, size_scale*72),
-        'subProcess': (size_scale*144, size_scale*108),
-        'eventBasedGateway': (size_scale*60, size_scale*60),
-        'timerEvent': (size_scale*48, size_scale*48),
     }
     return size_elements
 def rescale(scale, boxes):
     for i in range(len(boxes)):
-                boxes[i] = [boxes[i][0]*scale,
-                            boxes[i][1]*scale,
-                            boxes[i][2]*scale,
-                            boxes[i][3]*scale]
     return boxes
-#Function to create the unique BPMN_id
-def create_BPMN_id(labels,pool_dict):
-    BPMN_id = [class_dict[labels[i]] for i in range(len(labels))]
     data_counter = 1
@@ -336,7 +478,7 @@ def create_BPMN_id(labels,pool_dict):
             else:
                 BPMN_id[idx] = f'{key}_{enums[key]}'
                 enums[key] += 1
     # Update the pool_dict keys with their corresponding BPMN_id values
     updated_pool_dict = {}
     for key, value in pool_dict.items():
@@ -346,10 +488,18 @@ def create_BPMN_id(labels,pool_dict):
     return BPMN_id, updated_pool_dict
 def add_diagram_elements(parent, element_id, x, y, width, height):
-    """Utility to add BPMN diagram notation for elements."""
     shape = ET.SubElement(parent, 'bpmndi:BPMNShape', attrib={
         'bpmnElement': element_id,
         'id': element_id + '_di'
@@ -362,7 +512,14 @@ def add_diagram_elements(parent, element_id, x, y, width, height):
     })
 def add_diagram_edge(parent, element_id, waypoints):
-    """Utility to add BPMN diagram notation for sequence flows."""
     edge = ET.SubElement(parent, 'bpmndi:BPMNEdge', attrib={
         'bpmnElement': element_id,
         'id': element_id + '_di'
@@ -375,8 +532,17 @@ def add_diagram_edge(parent, element_id, waypoints):
             'y': str(y)
         })
 def check_status(link, keep_elements):
     if link[0] in keep_elements and link[1] in keep_elements:
         return 'middle'
     elif link[0] is None and link[1] in keep_elements:
@@ -385,40 +551,87 @@ def check_status(link, keep_elements):
         return 'end'
     else:
         return 'middle'
 def check_data_association(i, links, labels, keep_elements):
     status, links_idx = [], []
-    for j, (k,l) in enumerate(links):
         if labels[j] == list(class_dict.values()).index('dataAssociation'):
-            if k==i:
                 status.append('output')
                 links_idx.append(j)
-            elif l==i:
                 status.append('input')
                 links_idx.append(j)
     return status, links_idx
-def create_data_Association(bpmn,data,size,element_id,current_idx,source_id,target_id):
     waypoints = calculate_waypoints(data, size, current_idx, source_id, target_id)
     if waypoints is not None:
         add_diagram_edge(bpmn, element_id, waypoints)
 def check_eventBasedGateway(i, links, labels):
     status, links_idx = [], []
-    for j, (k,l) in enumerate(links):
         if labels[j] == list(class_dict.values()).index('sequenceFlow'):
-            if k==i:
                 status.append('output')
                 links_idx.append(j)
-            elif l==i:
                 status.append('input')
                 links_idx.append(j)
     return status, links_idx
 # Function to dynamically create and layout BPMN elements
 def create_bpmn_object(process, bpmnplane, text_mapping, definitions, size, data, keep_elements):
     elements = data['BPMN_id']
     positions = data['boxes']
     links = data['links']
@@ -536,7 +749,6 @@ def create_bpmn_object(process, bpmnplane, text_mapping, definitions, size, data
                         sub_element = ET.SubElement(element, 'bpmn:eventBasedGateway', id=f'eventBasedGateway_{link_idx}_{gateway_name.split("_")[1]}')
                         create_data_Association(bpmnplane, data, size, sub_element.attrib['id'], i, element_id, gateway_name)
             add_diagram_elements(bpmnplane, element_id, x, y, size['eventBasedGateway'][0], size['eventBasedGateway'][1])
         # Data Object
@@ -558,6 +770,19 @@ def create_bpmn_object(process, bpmnplane, text_mapping, definitions, size, data
             add_diagram_elements(bpmnplane, element_id, x, y, size['timerEvent'][0], size['timerEvent'][1])
 def calculate_pool_bounds(boxes, labels, keep_elements, size=None, class_dict=None):
     min_x, min_y = float('inf'), float('inf')
     max_x, max_y = float('-inf'), float('-inf')
@@ -588,9 +813,22 @@ def calculate_pool_bounds(boxes, labels, keep_elements, size=None, class_dict=No
     return min_x, min_y, max_x, max_y
 def calculate_pool_waypoints(idx, data, size, source_idx, target_idx, source_element, target_element):
     # Get the bounding boxes of the source and target elements
     source_box = data['boxes'][source_idx]
     target_box = data['boxes'][target_idx]
@@ -625,11 +863,19 @@ def calculate_pool_waypoints(idx, data, size, source_idx, target_idx, source_ele
             waypoints = [(element_mid_x, element_box[3]), (element_mid_x, pool_box[1])]
     return waypoints
 def add_curve(waypoints, pos_source, pos_target, threshold=30):
     """
     Add a single curve to the sequence flow by introducing a control point.
     The control point is added at an offset from the midpoint of the original waypoints.
     """
     if len(waypoints) < 2:
         return waypoints
@@ -647,7 +893,7 @@ def add_curve(waypoints, pos_source, pos_target, threshold=30):
     if abs(start_x - end_x) < threshold or abs(start_y - end_y) < threshold:
         return waypoints
-    # Calculate the control point
     if pos_source in pos_horizontal and pos_target in pos_horizontal:
         control_point = None
     elif pos_source in pos_vertical and pos_target in pos_vertical:
@@ -658,7 +904,6 @@ def add_curve(waypoints, pos_source, pos_target, threshold=30):
         control_point = (start_x, end_y)
     else:
         control_point = None
     # Create the curved path
     if control_point is not None:
@@ -668,8 +913,20 @@ def add_curve(waypoints, pos_source, pos_target, threshold=30):
     return curved_waypoints
 def calculate_waypoints(data, size, current_idx, source_id, target_id):
     best_points = data['best_points'][current_idx]
     pos_source = best_points[0]
     pos_target = best_points[1]
@@ -684,7 +941,6 @@ def calculate_waypoints(data, size, current_idx, source_id, target_id):
     if source_idx is None or target_idx is None:
         warning()
         return None
     name_source = source_id.split('_')[0]
     name_target = target_id.split('_')[0]
@@ -702,6 +958,7 @@ def calculate_waypoints(data, size, current_idx, source_id, target_id):
         warning()
         return [(source_x, source_y), (target_x, target_y)]
     if pos_source == 'left':
         source_x = source_x
         source_y += size[name_source][1] / 2
@@ -715,6 +972,7 @@ def calculate_waypoints(data, size, current_idx, source_id, target_id):
         source_x += size[name_source][0] / 2
         source_y += size[name_source][1]
     if pos_target == 'left':
         target_x = target_x
         target_y += size[name_target][1] / 2
@@ -738,8 +996,19 @@ def calculate_waypoints(data, size, current_idx, source_id, target_id):
     return curved_waypoints
 def create_flow_element(bpmn, text_mapping, idx, size, data, parent, message=False):
     source_idx, target_idx = data['links'][idx]
     if source_idx is None or target_idx is None:
@@ -774,6 +1043,3 @@ def create_flow_element(bpmn, text_mapping, idx, size, data, parent, message=Fal
             return
         element = ET.SubElement(parent, 'bpmn:sequenceFlow', id=element_id, sourceRef=source_id, targetRef=target_id, name=text_mapping[data['BPMN_id'][idx]])
     add_diagram_edge(bpmn, element_id, waypoints)

 import numpy as np
 def find_position(pool_index, BPMN_id):
+    """
+    Find the position of the pool index in the BPMN_id list.
+    Args:
+        pool_index (str): The pool index to search for.
+        BPMN_id (list): List of BPMN IDs.
+    Returns:
+        int: The index of the pool_index in BPMN_id, or None if not found.
+    """
     if pool_index in BPMN_id:
         position = BPMN_id.index(pool_index)
     else:
 # Calculate the center of each bounding box and group them by pool
 def calculate_centers_and_group_by_pool(pred, class_dict):
+    """
+    Calculate the center coordinates of bounding boxes and group them by pool.
+    Args:
+        pred (dict): Dictionary containing prediction results, including 'pool_dict', 'boxes', and 'labels'.
+        class_dict (dict): Dictionary mapping class indices to class names.
+    Returns:
+        dict: Dictionary grouping centers and their indices by pool index.
+    """
     pool_groups = {}
     for pool_index, element_indices in pred['pool_dict'].items():
         pool_groups[pool_index] = []
                 continue
             if class_dict[pred['labels'][i]] not in ['dataObject', 'dataStore']:
                 x1, y1, x2, y2 = pred['boxes'][i]
+                center = [(x1 + x2) / 2, (y1 + y2) / 2]  # Compute the center of the bounding box
                 pool_groups[pool_index].append((center, i))
     return pool_groups
 # Group centers within a specified range
 def group_centers(centers, axis, range_=50):
+    """
+    Group centers based on a specified range along an axis.
+    Args:
+        centers (list): List of center coordinates and their indices.
+        axis (int): The axis (0 for x, 1 for y) to group centers along.
+        range_ (int): Maximum distance to consider centers as part of the same group.
+    Returns:
+        list: List of groups, where each group is a list of centers and indices.
+    """
     groups = []
     while centers:
         center, idx = centers.pop(0)
 # Align the elements within each pool
 def align_elements_within_pool(modified_pred, pool_groups, class_dict, size):
+    """
+    Align elements within each pool based on their centers.
+    Args:
+        modified_pred (dict): Dictionary containing the modified predictions.
+        pool_groups (dict): Dictionary grouping centers and their indices by pool index.
+        class_dict (dict): Dictionary mapping class indices to class names.
+        size (dict): Dictionary containing element sizes.
+    """
     for pool_index, centers in pool_groups.items():
+        # Align elements based on y-coordinates
         y_groups = group_centers(centers.copy(), axis=1)
         align_y_coordinates(modified_pred, y_groups, class_dict, size)
+        # Recalculate centers after y-alignment and then align based on x-coordinates
         centers = recalculate_centers(modified_pred, y_groups)
         x_groups = group_centers(centers.copy(), axis=0)
         align_x_coordinates(modified_pred, x_groups, class_dict, size)
 # Align the y-coordinates of the centers of grouped bounding boxes
 def align_y_coordinates(modified_pred, y_groups, class_dict, size):
+    """
+    Align the y-coordinates of elements in each group.
+    Args:
+        modified_pred (dict): Dictionary containing the modified predictions.
+        y_groups (list): List of groups of centers and their indices, grouped by y-coordinate.
+        class_dict (dict): Dictionary mapping class indices to class names.
+        size (dict): Dictionary containing element sizes.
+    """
     for group in y_groups:
+        avg_y = sum([c[0][1] for c in group]) / len(group)  # Compute the average y-coordinate
         for (center, idx) in group:
             label = class_dict[modified_pred['labels'][idx]]
             if label in size:
 # Recalculate centers after alignment
 def recalculate_centers(modified_pred, groups):
+    """
+    Recalculate the centers of bounding boxes after alignment.
+    Args:
+        modified_pred (dict): Dictionary containing the modified predictions.
+        groups (list): List of groups of centers and their indices.
+    Returns:
+        list: List of recalculated centers and their indices.
+    """
     centers = []
     for group in groups:
         for center, idx in group:
             x1, y1, x2, y2 = modified_pred['boxes'][idx]
+            center = [(x1 + x2) / 2, (y1 + y2) / 2]  # Recompute the center after alignment
             centers.append((center, idx))
     return centers
 # Align the x-coordinates of the centers of grouped bounding boxes
 def align_x_coordinates(modified_pred, x_groups, class_dict, size):
+    """
+    Align the x-coordinates of elements in each group.
+    Args:
+        modified_pred (dict): Dictionary containing the modified predictions.
+        x_groups (list): List of groups of centers and their indices, grouped by x-coordinate.
+        class_dict (dict): Dictionary mapping class indices to class names.
+        size (dict): Dictionary containing element sizes.
+    """
     for group in x_groups:
+        avg_x = sum([c[0][0] for c in group]) / len(group)  # Compute the average x-coordinate
         for (center, idx) in group:
             label = class_dict[modified_pred['labels'][idx]]
             if label in size:
 # Expand the pool bounding boxes to fit the aligned elements
 def expand_pool_bounding_boxes(modified_pred, size_elements):
+    """
+    Expand the bounding boxes of pools to fit aligned elements.
+    Args:
+        modified_pred (dict): Dictionary containing the modified predictions.
+        size_elements (dict): Dictionary containing element sizes.
+    """
     for idx, (pool_index, keep_elements) in enumerate(modified_pred['pool_dict'].items()):
         if len(keep_elements) != 0:
             marge = size_elements['task'][1] // 2
             error("The pool is maybe too small, please add more elements or increase the scale by zooming on the image.")
             continue
+        # Update the pool bounding box with margin
         modified_pred['boxes'][position] = [min_x - marge, min_y - marge//2, min_x + pool_width + marge, min_y + pool_height + marge//2]
 # Adjust left and right boundaries of all pools
 def adjust_pool_boundaries(modified_pred, pred):
+    """
+    Adjust the left and right boundaries of all pools to ensure they cover all elements.
+    Args:
+        modified_pred (dict): Dictionary containing the modified predictions.
+        pred (dict): Dictionary containing original prediction results.
+    """
     min_left, max_right = 0, 0
     for pool_index, element_indices in pred['pool_dict'].items():
         position = find_position(pool_index, modified_pred['BPMN_id'])
             x1 = min_left
         if x2 < max_right:
             x2 = max_right
+        # Update the pool bounding box with adjusted boundaries
         modified_pred['boxes'][position] = [x1, y1, x2, y2]
 # Main function to align boxes
 def align_boxes(pred, size, class_dict):
+    """
+    Main function to align bounding boxes for the given prediction data.
+    Args:
+        pred (dict): Dictionary containing prediction results.
+        size (dict): Dictionary containing element sizes.
+        class_dict (dict): Dictionary mapping class indices to class names.
+    Returns:
+        list: List of aligned bounding boxes.
+    """
     modified_pred = copy.deepcopy(pred)
     pool_groups = calculate_centers_and_group_by_pool(pred, class_dict)
     align_elements_within_pool(modified_pred, pool_groups, class_dict, size)
     return modified_pred['boxes']
 # Function to create a BPMN XML file from prediction results
 def create_XML(full_pred, text_mapping, size_scale, scale):
+    """
+    Create a BPMN XML file from the prediction results.
+    Args:
+        full_pred (dict): Dictionary containing full prediction results.
+        text_mapping (dict): Dictionary mapping BPMN IDs to text labels.
+        size_scale (float): Scaling factor for element sizes.
+        scale (float): Scaling factor for bounding boxes.
+    Returns:
+        str: Pretty-printed BPMN XML string.
+    """
     namespaces = {
         'bpmn': 'http://www.omg.org/spec/BPMN/20100524/MODEL',
         'bpmndi': 'http://www.omg.org/spec/BPMN/20100524/DI',
         'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
     }
     definitions = ET.Element('bpmn:definitions', {
         'xmlns:xsi': namespaces['xsi'],
         'xmlns:bpmn': namespaces['bpmn'],
         'id': "simpleExample"
     })
     size_elements = get_size_elements(size_scale)
+    # If there is no pool or lane, create a pool with all elements
     if len(full_pred['pool_dict']) == 0 or (len(full_pred['pool_dict']) == 1 and len(next(iter(full_pred['pool_dict'].values()))) == len(full_pred['labels'])):
         full_pred, text_mapping = create_big_pool(full_pred, text_mapping, size_elements)
+    # Backup the original box positions
     old_boxes = copy.deepcopy(full_pred)
     # Create BPMN collaboration element
     # Create BPMN process elements
     process = []
+    for idx in range(len(full_pred['pool_dict'].items())):
+        process_id = f'process_{idx+1}'
         process.append(ET.SubElement(definitions, 'bpmn:process', id=process_id, isExecutable='false'))
     bpmndi = ET.SubElement(definitions, 'bpmndi:BPMNDiagram', id='BPMNDiagram_1')
     bpmnplane = ET.SubElement(bpmndi, 'bpmndi:BPMNPlane', id='BPMNPlane_1', bpmnElement='collaboration_1')
+    # Rescale and align bounding boxes
     full_pred['boxes'] = rescale_boxes(scale, old_boxes['boxes'])
     full_pred['boxes'] = align_boxes(full_pred, size_elements, class_dict)
     # Add diagram elements for each pool
     for idx, (pool_index, keep_elements) in enumerate(full_pred['pool_dict'].items()):
         pool = ET.SubElement(collaboration, 'bpmn:participant', id=pool_id, processRef=f'process_{idx+1}', name=text_mapping[pool_index])
         position = find_position(pool_index, full_pred['BPMN_id'])
         if position >= len(full_pred['boxes']):
             print("Problem with the index")
             continue
         add_diagram_elements(bpmnplane, pool_id, min_x, min_y, pool_width, pool_height)
     # Create BPMN elements for each pool
     for idx, (pool_index, keep_elements) in enumerate(full_pred['pool_dict'].items()):
         create_bpmn_object(process[idx], bpmnplane, text_mapping, definitions, size_elements, full_pred, keep_elements)
     reparsed = minidom.parseString(rough_string)
     pretty_xml_as_string = reparsed.toprettyxml(indent="  ")
+    # Restore the original box positions
     full_pred['boxes'] = rescale_boxes(1/scale, full_pred['boxes'])
     full_pred['boxes'] = old_boxes
 # Function that creates a single pool with all elements
 def create_big_pool(full_pred, text_mapping, size_elements, marge=50):
+    """
+    Create a single pool containing all elements if no pools or lanes are detected.
+    Args:
+        full_pred (dict): Dictionary containing full prediction results.
+        text_mapping (dict): Dictionary mapping BPMN IDs to text labels.
+        size_elements (dict): Dictionary containing element sizes.
+        marge (int, optional): Margin to add around the pool. Defaults to 50.
+    Returns:
+        tuple: Updated full_pred and text_mapping.
+    """
     new_pool_index = 'pool_1'
     size_elements = get_size_elements(st.session_state.size_scale)
     elements_pool = list(range(len(full_pred['boxes'])))
+    min_x, min_y, max_x, max_y = calculate_pool_bounds(full_pred['boxes'], full_pred['labels'], elements_pool, size_elements)
     box = [min_x - marge, min_y - marge//2, max_x + marge, max_y + marge//2]
     full_pred['boxes'] = np.append(full_pred['boxes'], [box], axis=0)
     full_pred['pool_dict'][new_pool_index] = elements_pool
 # Function that gives the size of the elements
 def get_size_elements(size_scale=1):
+    """
+    Get the sizes of BPMN elements based on the scaling factor.
+    Args:
+        size_scale (float, optional): Scaling factor for element sizes. Defaults to 1.
+    Returns:
+        dict: Dictionary containing element sizes.
+    """
     size_elements = {
+        'event': (size_scale * 43.2, size_scale * 43.2),
+        'task': (size_scale * 120, size_scale * 96),
+        'message': (size_scale * 43.2, size_scale * 43.2),
+        'messageEvent': (size_scale * 43.2, size_scale * 43.2),
+        'exclusiveGateway': (size_scale * 60, size_scale * 60),
+        'parallelGateway': (size_scale * 60, size_scale * 60),
+        'dataObject': (size_scale * 48, size_scale * 72),
+        'dataStore': (size_scale * 72, size_scale * 72),
+        'subProcess': (size_scale * 144, size_scale * 108),
+        'eventBasedGateway': (size_scale * 60, size_scale * 60),
+        'timerEvent': (size_scale * 48, size_scale * 48),
     }
     return size_elements
 def rescale(scale, boxes):
+    """
+    Rescale the bounding boxes by a given scaling factor.
+    Args:
+        scale (float): Scaling factor.
+        boxes (list): List of bounding boxes.
+    Returns:
+        list: Rescaled bounding boxes.
+    """
     for i in range(len(boxes)):
+        boxes[i] = [boxes[i][0] * scale,
+                    boxes[i][1] * scale,
+                    boxes[i][2] * scale,
+                    boxes[i][3] * scale]
     return boxes
+# Function to create the unique BPMN_id
+def create_BPMN_id(labels, pool_dict):
+    """
+    Create unique BPMN IDs for each element based on their labels.
+    Args:
+        labels (list): List of labels for each element.
+        pool_dict (dict): Dictionary containing pool indices and their elements.
+    Returns:
+        tuple: List of BPMN IDs and updated pool dictionary.
+    """
+    BPMN_id = [class_dict[labels[i]] for i in range(len(labels))]
     data_counter = 1
             else:
                 BPMN_id[idx] = f'{key}_{enums[key]}'
                 enums[key] += 1
     # Update the pool_dict keys with their corresponding BPMN_id values
     updated_pool_dict = {}
     for key, value in pool_dict.items():
     return BPMN_id, updated_pool_dict
 def add_diagram_elements(parent, element_id, x, y, width, height):
+    """
+    Utility to add BPMN diagram notation for elements.
+    Args:
+        parent (Element): The parent XML element.
+        element_id (str): The ID of the BPMN element.
+        x (float): The x-coordinate of the element.
+        y (float): The y-coordinate of the element.
+        width (float): The width of the element.
+        height (float): The height of the element.
+    """
     shape = ET.SubElement(parent, 'bpmndi:BPMNShape', attrib={
         'bpmnElement': element_id,
         'id': element_id + '_di'
     })
 def add_diagram_edge(parent, element_id, waypoints):
+    """
+    Utility to add BPMN diagram notation for sequence flows.
+    Args:
+        parent (Element): The parent XML element.
+        element_id (str): The ID of the BPMN element.
+        waypoints (list): List of waypoints for the sequence flow.
+    """
     edge = ET.SubElement(parent, 'bpmndi:BPMNEdge', attrib={
         'bpmnElement': element_id,
         'id': element_id + '_di'
             'y': str(y)
         })
 def check_status(link, keep_elements):
+    """
+    Check the status of a link in terms of its position within the elements.
+    Args:
+        link (tuple): A tuple representing the start and end of the link.
+        keep_elements (list): List of elements to keep.
+    Returns:
+        str: Status of the link ('middle', 'start', or 'end').
+    """
     if link[0] in keep_elements and link[1] in keep_elements:
         return 'middle'
     elif link[0] is None and link[1] in keep_elements:
         return 'end'
     else:
         return 'middle'
 def check_data_association(i, links, labels, keep_elements):
+    """
+    Check data associations for an element.
+    Args:
+        i (int): Index of the current element.
+        links (list): List of links between elements.
+        labels (list): List of labels for each element.
+        keep_elements (list): List of elements to keep.
+    Returns:
+        tuple: Status and indices of data associations.
+    """
     status, links_idx = [], []
+    for j, (k, l) in enumerate(links):
         if labels[j] == list(class_dict.values()).index('dataAssociation'):
+            if k == i:
                 status.append('output')
                 links_idx.append(j)
+            elif l == i:
                 status.append('input')
                 links_idx.append(j)
     return status, links_idx
+def create_data_Association(bpmn, data, size, element_id, current_idx, source_id, target_id):
+    """
+    Create a data association in the BPMN diagram.
+    Args:
+        bpmn (Element): The parent XML element.
+        data (dict): Dictionary containing prediction results.
+        size (dict): Dictionary containing element sizes.
+        element_id (str): The ID of the BPMN element.
+        current_idx (int): Index of the current element.
+        source_id (str): The source element ID.
+        target_id (str): The target element ID.
+    """
     waypoints = calculate_waypoints(data, size, current_idx, source_id, target_id)
     if waypoints is not None:
         add_diagram_edge(bpmn, element_id, waypoints)
 def check_eventBasedGateway(i, links, labels):
+    """
+    Check event-based gateway for an element.
+    Args:
+        i (int): Index of the current element.
+        links (list): List of links between elements.
+        labels (list): List of labels for each element.
+    Returns:
+        tuple: Status and indices of event-based gateway.
+    """
     status, links_idx = [], []
+    for j, (k, l) in enumerate(links):
         if labels[j] == list(class_dict.values()).index('sequenceFlow'):
+            if k == i:
                 status.append('output')
                 links_idx.append(j)
+            elif l == i:
                 status.append('input')
                 links_idx.append(j)
     return status, links_idx
 # Function to dynamically create and layout BPMN elements
 def create_bpmn_object(process, bpmnplane, text_mapping, definitions, size, data, keep_elements):
+    """
+    Dynamically create and layout BPMN elements.
+    Args:
+        process (Element): The BPMN process element.
+        bpmnplane (Element): The BPMN plane element.
+        text_mapping (dict): Dictionary mapping BPMN IDs to text labels.
+        definitions (Element): The BPMN definitions element.
+        size (dict): Dictionary containing element sizes.
+        data (dict): Dictionary containing prediction results.
+        keep_elements (list): List of elements to keep.
+    """
     elements = data['BPMN_id']
     positions = data['boxes']
     links = data['links']
                         sub_element = ET.SubElement(element, 'bpmn:eventBasedGateway', id=f'eventBasedGateway_{link_idx}_{gateway_name.split("_")[1]}')
                         create_data_Association(bpmnplane, data, size, sub_element.attrib['id'], i, element_id, gateway_name)
             add_diagram_elements(bpmnplane, element_id, x, y, size['eventBasedGateway'][0], size['eventBasedGateway'][1])
         # Data Object
             add_diagram_elements(bpmnplane, element_id, x, y, size['timerEvent'][0], size['timerEvent'][1])
 def calculate_pool_bounds(boxes, labels, keep_elements, size=None, class_dict=None):
+    """
+    Calculate the bounding box for a pool.
+    Args:
+        boxes (list): List of bounding boxes.
+        labels (list): List of labels for each element.
+        keep_elements (list): List of elements to keep.
+        size (dict, optional): Dictionary containing element sizes. Defaults to None.
+        class_dict (dict, optional): Dictionary mapping class indices to class names. Defaults to None.
+    Returns:
+        tuple: Minimum and maximum x and y coordinates of the pool.
+    """
     min_x, min_y = float('inf'), float('inf')
     max_x, max_y = float('-inf'), float('-inf')
     return min_x, min_y, max_x, max_y
 def calculate_pool_waypoints(idx, data, size, source_idx, target_idx, source_element, target_element):
+    """
+    Calculate waypoints for connecting elements within a pool.
+    Args:
+        idx (int): Index of the current element.
+        data (dict): Dictionary containing prediction results.
+        size (dict): Dictionary containing element sizes.
+        source_idx (int): Index of the source element.
+        target_idx (int): Index of the target element.
+        source_element (str): Source element type.
+        target_element (str): Target element type.
+    Returns:
+        list: List of waypoints for the connection.
+    """
     # Get the bounding boxes of the source and target elements
     source_box = data['boxes'][source_idx]
     target_box = data['boxes'][target_idx]
             waypoints = [(element_mid_x, element_box[3]), (element_mid_x, pool_box[1])]
     return waypoints
 def add_curve(waypoints, pos_source, pos_target, threshold=30):
     """
     Add a single curve to the sequence flow by introducing a control point.
     The control point is added at an offset from the midpoint of the original waypoints.
+    Args:
+        waypoints (list): List of waypoints representing the path.
+        pos_source (str): Position of the source element ('left', 'right', 'top', 'bottom').
+        pos_target (str): Position of the target element ('left', 'right', 'top', 'bottom').
+        threshold (int, optional): Minimum distance to consider for adding a curve. Defaults to 30.
+    Returns:
+        list: List of waypoints with the added control point if applicable.
     """
     if len(waypoints) < 2:
         return waypoints
     if abs(start_x - end_x) < threshold or abs(start_y - end_y) < threshold:
         return waypoints
+    # Calculate the control point based on source and target positions
     if pos_source in pos_horizontal and pos_target in pos_horizontal:
         control_point = None
     elif pos_source in pos_vertical and pos_target in pos_vertical:
         control_point = (start_x, end_y)
     else:
         control_point = None
     # Create the curved path
     if control_point is not None:
     return curved_waypoints
 def calculate_waypoints(data, size, current_idx, source_id, target_id):
+    """
+    Calculate waypoints for connecting two elements in the diagram.
+    Args:
+        data (dict): Data containing diagram information.
+        size (dict): Dictionary of element sizes.
+        current_idx (int): Index of the current element.
+        source_id (str): ID of the source element.
+        target_id (str): ID of the target element.
+    Returns:
+        list: List of waypoints for the connection.
+    """
     best_points = data['best_points'][current_idx]
     pos_source = best_points[0]
     pos_target = best_points[1]
     if source_idx is None or target_idx is None:
         warning()
         return None
     name_source = source_id.split('_')[0]
     name_target = target_id.split('_')[0]
         warning()
         return [(source_x, source_y), (target_x, target_y)]
+    # Adjust the source coordinates based on its position
     if pos_source == 'left':
         source_x = source_x
         source_y += size[name_source][1] / 2
         source_x += size[name_source][0] / 2
         source_y += size[name_source][1]
+    # Adjust the target coordinates based on its position
     if pos_target == 'left':
         target_x = target_x
         target_y += size[name_target][1] / 2
     return curved_waypoints
 def create_flow_element(bpmn, text_mapping, idx, size, data, parent, message=False):
+    """
+    Create a BPMN flow element (sequence flow or message flow) and add it to the BPMN diagram.
+    Args:
+        bpmn (ET.Element): The BPMN diagram element.
+        text_mapping (dict): Dictionary mapping element IDs to their text labels.
+        idx (int): Index of the current element.
+        size (dict): Dictionary of element sizes.
+        data (dict): Data containing diagram information.
+        parent (ET.Element): The parent element to which the flow element is added.
+        message (bool, optional): Whether the flow is a message flow. Defaults to False.
+    """
     source_idx, target_idx = data['links'][idx]
     if source_idx is None or target_idx is None:
             return
         element = ET.SubElement(parent, 'bpmn:sequenceFlow', id=element_id, sourceRef=source_id, targetRef=target_id, name=text_mapping[data['BPMN_id'][idx]])
     add_diagram_edge(bpmn, element_id, waypoints)

modules/train.py CHANGED Viewed

@@ -15,8 +15,6 @@ from tqdm import tqdm
 from modules.utils import write_results
 def get_arrow_model(num_classes, num_keypoints=2):
     """
     Configures and returns a modified Keypoint R-CNN model based on ResNet-50 with FPN, adapted for a custom number of classes and keypoints.
@@ -27,14 +25,6 @@ def get_arrow_model(num_classes, num_keypoints=2):
     Returns:
     - model (torch.nn.Module): The modified Keypoint R-CNN model.
-    Steps:
-    1. Load a pre-trained Keypoint R-CNN model with a ResNet-50 backbone and Feature Pyramid Network (FPN).
-       The model is initially configured for the COCO dataset, which includes various object classes and keypoints.
-    2. Replace the box predictor to adjust the number of output classes. The box predictor is responsible for
-       classifying detected regions and predicting their bounding boxes.
-    3. Replace the keypoint predictor to adjust the number of keypoints the model predicts for each object.
-       This is necessary to tailor the model to specific tasks that may have different keypoint structures.
     """
     # Load a model pre-trained on COCO, initialized without pre-trained weights
     model = keypointrcnn_resnet50_fpn(weights=None)
@@ -72,44 +62,60 @@ def get_faster_rcnn_model(num_classes):
     return model
-def prepare_model(dict,opti,learning_rate= 0.0003,model_to_load=None, model_type = 'object'):
-  # Adjusted to pass the class_dict directly
-  if model_type == 'object':
-    model = get_faster_rcnn_model(len(dict))
-  elif model_type == 'arrow':
-    model = get_arrow_model(len(dict),2)
-  device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
-  # Load the model weights
-  if model_to_load:
-    model.load_state_dict(torch.load('./models/'+ model_to_load +'.pth', map_location=device))
-    print(f"Model '{model_to_load}'  loaded")
-  device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
-  model.to(device)
-  if opti == 'SGD':
-    #learning_rate= 0.002
-    optimizer = SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0001)
-  elif opti == 'Adam':
-    #learning_rate = 0.0003
-    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=0.00056, eps=1e-08, betas=(0.9, 0.999))
-  else:
-    print('Optimizer not found')
-  return model, optimizer, device
-import copy
-from torch.optim import AdamW
-import time
-from modules.train import write_results
-import torch
-import numpy as np
-from tqdm import tqdm
 def evaluate_loss(model, data_loader, device, loss_config=None, print_losses=False):
     model.train()  # Set the model to evaluation mode
     total_loss = 0
@@ -174,12 +180,12 @@ def evaluate_loss(model, data_loader, device, loss_config=None, print_losses=Fal
     avg_loss_keypoints = np.mean(loss_keypoints_list)
     if print_losses:
-      print(f"Average Loss: {avg_loss:.4f}")
-      print(f"Average Classifier Loss: {avg_loss_classifier:.4f}")
-      print(f"Average Box Regression Loss: {avg_loss_box_reg:.4f}")
-      print(f"Average Objectness Loss: {avg_loss_objectness:.4f}")
-      print(f"Average RPN Box Regression Loss: {avg_loss_rpn_box_reg:.4f}")
-      print(f"Average Keypoints Loss: {avg_loss_keypoints:.4f}")
     return avg_loss
@@ -188,206 +194,225 @@ def training_model(num_epochs, model, data_loader, subset_test_loader,
                    optimizer, model_to_load=None, change_learning_rate=100, start_key=100,
                    parameters=None, blur_prob=0.02,
                    score_threshold=0.7, iou_threshold=0.5, early_stop_f1_score=0.97,
-                   information_training='training', start_epoch=0, loss_config=None, model_type = 'object',
                    eval_metric='f1_score', device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')):
-  # Set the model to training mode
-  model.train()
-  if loss_config is None:
-     print('No loss config found, all losses will be used.')
-  else:
-     #print the list of the losses that will be used
-      print('The following losses will be used: ', end='')
-      for key, value in loss_config.items():
-          if value:
-              print(key, end=", ")
-      print()
-  # Initialize lists to store epoch-wise average losses
-  epoch_avg_losses = []
-  epoch_avg_loss_classifier = []
-  epoch_avg_loss_box_reg = []
-  epoch_avg_loss_objectness = []
-  epoch_avg_loss_rpn_box_reg = []
-  epoch_avg_loss_keypoints = []
-  epoch_precision = []
-  epoch_recall = []
-  epoch_f1_score = []
-  epoch_test_loss = []
-  start_tot = time.time()
-  best_metrics = -1000
-  best_epoch = 0
-  best_model_state = None
-  same = 0
-  learning_rate = optimizer.param_groups[0]['lr']
-  bad_test_loss = 0
-  previous_test_loss = 1000
-  if parameters is not None:
-    batch_size, crop_prob, rotate_90_proba, h_flip_prob, v_flip_prob, max_rotate_deg, rotate_proba, keep_ratio = parameters.values()
-  print(f"Let's go training {model_type} model with {num_epochs} epochs!")
-  if parameters is not None:
-    print(f"Learning rate: {learning_rate}, Batch size: {batch_size}, Crop prob: {crop_prob}, H flip prob: {h_flip_prob}, V flip prob: {v_flip_prob}, Max rotate deg: {max_rotate_deg}, Rotate proba: {rotate_proba}, Rotate 90 proba: {rotate_90_proba}, Keep ratio: {keep_ratio}")
-  for epoch in range(num_epochs):
-      if (epoch>0 and (epoch)%change_learning_rate == 0) or bad_test_loss>=3:
-        learning_rate = 0.7*learning_rate
-        optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=learning_rate, eps=1e-08, betas=(0.9, 0.999))
-        if best_model_state is not None:
-          model.load_state_dict(best_model_state)
-        print(f'Learning rate changed to {learning_rate:.4} and the best epoch for now is {best_epoch}')
-        bad_test_loss = 0
-      if epoch>0 and (epoch)==start_key:
-        print("Now it's training Keypoints also")
-        loss_config['loss_keypoint'] = True
-        for name, param in model.named_parameters():
-          if 'keypoint' in name:
-              param.requires_grad = True
-      model.train()
-      start = time.time()
-      total_loss = 0
-      # Initialize lists to keep track of individual losses
-      loss_classifier_list = []
-      loss_box_reg_list = []
-      loss_objectness_list = []
-      loss_rpn_box_reg_list = []
-      loss_keypoints_list =  []
-      # Create a tqdm progress bar
-      progress_bar = tqdm(data_loader, desc=f'Epoch {epoch+1+start_epoch}')
-      for images, targets_im in progress_bar:
-          images = [image.to(device) for image in images]
-          targets = [{k: v.clone().detach().to(device) for k, v in t.items()} for t in targets_im]
-          optimizer.zero_grad()
-          loss_dict = model(images, targets)
-          # Inside the training loop where losses are calculated:
-          losses = 0
-          if loss_config is not None:
-            for key, loss in loss_dict.items():
-                if loss_config.get(key, False):
-                    if key == 'loss_classifier':
-                      loss *= 3
-                    losses += loss
-          else:
-            losses = sum(loss for key, loss in loss_dict.items())
-          # Collect individual losses
-          if loss_dict['loss_classifier']:
-            loss_classifier_list.append(loss_dict['loss_classifier'].item())
-          else:
-            loss_classifier_list.append(0)
-          if loss_dict['loss_box_reg']:
-            loss_box_reg_list.append(loss_dict['loss_box_reg'].item())
-          else:
-            loss_box_reg_list.append(0)
-          if loss_dict['loss_objectness']:
-            loss_objectness_list.append(loss_dict['loss_objectness'].item())
-          else:
-            loss_objectness_list.append(0)
-          if loss_dict['loss_rpn_box_reg']:
-            loss_rpn_box_reg_list.append(loss_dict['loss_rpn_box_reg'].item())
-          else:
-            loss_rpn_box_reg_list.append(0)
-          if 'loss_keypoint' in loss_dict:
-            loss_keypoints_list.append(loss_dict['loss_keypoint'].item())
-          else:
-            loss_keypoints_list.append(0)
-          losses.backward()
-          optimizer.step()
-          total_loss += losses.item()
-          # Update the description with the current loss
-          progress_bar.set_description(f'Epoch {epoch+1+start_epoch}, Loss: {losses.item():.4f}')
-      # Calculate average loss
-      avg_loss = total_loss / len(data_loader)
-      epoch_avg_losses.append(avg_loss)
-      epoch_avg_loss_classifier.append(np.mean(loss_classifier_list))
-      epoch_avg_loss_box_reg.append(np.mean(loss_box_reg_list))
-      epoch_avg_loss_objectness.append(np.mean(loss_objectness_list))
-      epoch_avg_loss_rpn_box_reg.append(np.mean(loss_rpn_box_reg_list))
-      epoch_avg_loss_keypoints.append(np.mean(loss_keypoints_list))
-        # Evaluate the model on the test set
-      if eval_metric == 'loss':
-        labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = 0,0,0,0,0,0
-        avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
-        print(f"Epoch {epoch+1+start_epoch}, Average Training Loss: {avg_loss:.4f}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
-      else:
-        avg_test_loss = 0
-        labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = main_evaluation(model, subset_test_loader,score_threshold=0.5, iou_threshold=0.5, distance_threshold=10, key_correction=False, model_type=model_type)
-        print(f"Epoch {epoch+1+start_epoch}, Average Loss: {avg_loss:.4f}, Labels_precision: {labels_precision:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f} ", end=", ")
-        avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
-        print(f"Epoch {epoch+1+start_epoch}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
-      print(f"Time: {time.time() - start:.2f} [s]")
-      if eval_metric == 'f1_score':
-        metric_used = f1_score
-      elif eval_metric == 'precision':
-        metric_used = precision
-      elif eval_metric == 'recall':
-        metric_used = recall
-      else:
-        metric_used = -avg_test_loss
-      # Check if this epoch's model has the lowest average loss
-      if metric_used > best_metrics:
-          best_metrics = metric_used
-          best_epoch = epoch+1+start_epoch
-          best_model_state = copy.deepcopy(model.state_dict())
-      if epoch>0 and f1_score>early_stop_f1_score:
-        same+=1
-      epoch_precision.append(precision)
-      epoch_recall.append(recall)
-      epoch_f1_score.append(f1_score)
-      epoch_test_loss.append(avg_test_loss)
-      name_model = f"model_{type(optimizer).__name__}_{epoch+1+start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob*10)}_crop0{int(crop_prob*10)}_flip0{int(h_flip_prob*10)}_rotate0{int(rotate_proba*10)}_{information_training}"
-      metrics_list = [epoch_avg_losses,epoch_avg_loss_classifier,epoch_avg_loss_box_reg,epoch_avg_loss_objectness,epoch_avg_loss_rpn_box_reg,epoch_avg_loss_keypoints,epoch_precision,epoch_recall,epoch_f1_score,epoch_test_loss]
-      if same >=1 :
-        torch.save(best_model_state, './models/'+ name_model +'.pth')
-        write_results(name_model,metrics_list,start_epoch)
-        break
-      if (epoch+1+start_epoch) % 5 == 0:
-        torch.save(best_model_state, './models/'+ name_model +'.pth')
-        model.load_state_dict(best_model_state)
-        write_results(name_model,metrics_list,start_epoch)
-      if avg_test_loss > previous_test_loss:
-        bad_test_loss += 1
-      previous_test_loss = avg_test_loss
-  print(f"\n Total time: {(time.time() - start_tot)/60} minutes, Best Epoch is {best_epoch} with an {eval_metric} of {best_metrics:.4f}")
-  if best_model_state:
-      torch.save(best_model_state, './models/'+ name_model +'.pth')
-      model.load_state_dict(best_model_state)
-      write_results(name_model,metrics_list,start_epoch)
-      print(f"Name of the best model: model_{type(optimizer).__name__}_{epoch+1+start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob*10)}_crop0{int(crop_prob*10)}_flip0{int(h_flip_prob*10)}_rotate0{int(rotate_proba*10)}_{information_training}")
-  return model

 from modules.utils import write_results
 def get_arrow_model(num_classes, num_keypoints=2):
     """
     Configures and returns a modified Keypoint R-CNN model based on ResNet-50 with FPN, adapted for a custom number of classes and keypoints.
     Returns:
     - model (torch.nn.Module): The modified Keypoint R-CNN model.
     """
     # Load a model pre-trained on COCO, initialized without pre-trained weights
     model = keypointrcnn_resnet50_fpn(weights=None)
     return model
+def prepare_model(dict, opti, learning_rate=0.0003, model_to_load=None, model_type='object'):
+    """
+    Prepares the model and optimizer for training.
+    Parameters:
+    - dict (dict): Dictionary of classes.
+    - opti (str): Optimizer type ('SGD' or 'Adam').
+    - learning_rate (float): Learning rate for the optimizer.
+    - model_to_load (str, optional): Name of the model to load.
+    - model_type (str): Type of model to prepare ('object' or 'arrow').
+    Returns:
+    - model (torch.nn.Module): The prepared model.
+    - optimizer (torch.optim.Optimizer): The configured optimizer.
+    - device (torch.device): The device (CPU or CUDA) on which to perform training.
+    """
+    # Adjusted to pass the class_dict directly
+    if model_type == 'object':
+        model = get_faster_rcnn_model(len(dict))
+    elif model_type == 'arrow':
+        model = get_arrow_model(len(dict), 2)
+    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    # Load the model weights
+    if model_to_load:
+        model.load_state_dict(torch.load('./models/' + model_to_load + '.pth', map_location=device))
+        print(f"Model '{model_to_load}'  loaded")
+    model.to(device)
+    if opti == 'SGD':
+        optimizer = SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0001)
+    elif opti == 'Adam':
+        optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=0.00056, eps=1e-08, betas=(0.9, 0.999))
+    else:
+        print('Optimizer not found')
+    return model, optimizer, device
 def evaluate_loss(model, data_loader, device, loss_config=None, print_losses=False):
+    """
+    Evaluate the loss of the model on a validation dataset.
+    Parameters:
+    - model (torch.nn.Module): The model to evaluate.
+    - data_loader (torch.utils.data.DataLoader): DataLoader for the validation dataset.
+    - device (torch.device): Device to perform evaluation on.
+    - loss_config (dict, optional): Configuration specifying which losses to use.
+    - print_losses (bool): Whether to print individual loss components.
+    Returns:
+    - float: Average loss over the validation dataset.
+    """
     model.train()  # Set the model to evaluation mode
     total_loss = 0
     avg_loss_keypoints = np.mean(loss_keypoints_list)
     if print_losses:
+        print(f"Average Loss: {avg_loss:.4f}")
+        print(f"Average Classifier Loss: {avg_loss_classifier:.4f}")
+        print(f"Average Box Regression Loss: {avg_loss_box_reg:.4f}")
+        print(f"Average Objectness Loss: {avg_loss_objectness:.4f}")
+        print(f"Average RPN Box Regression Loss: {avg_loss_rpn_box_reg:.4f}")
+        print(f"Average Keypoints Loss: {avg_loss_keypoints:.4f}")
     return avg_loss
                    optimizer, model_to_load=None, change_learning_rate=100, start_key=100,
                    parameters=None, blur_prob=0.02,
                    score_threshold=0.7, iou_threshold=0.5, early_stop_f1_score=0.97,
+                   information_training='training', start_epoch=0, loss_config=None, model_type='object',
                    eval_metric='f1_score', device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')):
+    """
+    Train the model over a specified number of epochs.
+    Parameters:
+    - num_epochs (int): Number of epochs to train for.
+    - model (torch.nn.Module): Model to train.
+    - data_loader (torch.utils.data.DataLoader): DataLoader for the training dataset.
+    - subset_test_loader (torch.utils.data.DataLoader): DataLoader for the validation dataset.
+    - optimizer (torch.optim.Optimizer): Optimizer to use for training.
+    - model_to_load (str, optional): Name of the model to load.
+    - change_learning_rate (int): Epoch interval to change the learning rate.
+    - start_key (int): Epoch to start training keypoints.
+    - parameters (dict, optional): Additional training parameters.
+    - blur_prob (float): Probability of applying blur augmentation.
+    - score_threshold (float): Score threshold for evaluation.
+    - iou_threshold (float): IoU threshold for evaluation.
+    - early_stop_f1_score (float): F1 score threshold for early stopping.
+    - information_training (str): Information about the training.
+    - start_epoch (int): Starting epoch number.
+    - loss_config (dict, optional): Configuration specifying which losses to use.
+    - model_type (str): Type of model ('object' or 'arrow').
+    - eval_metric (str): Evaluation metric ('f1_score', 'precision', 'recall', or 'loss').
+    - device (torch.device): Device to perform training on.
+    Returns:
+    - model (torch.nn.Module): Trained model.
+    """
+    model.train()
+    if loss_config is None:
+        print('No loss config found, all losses will be used.')
+    else:
+        # Print the list of the losses that will be used
+        print('The following losses will be used: ', end='')
+        for key, value in loss_config.items():
+            if value:
+                print(key, end=", ")
+        print()
+    # Initialize lists to store epoch-wise average losses
+    epoch_avg_losses = []
+    epoch_avg_loss_classifier = []
+    epoch_avg_loss_box_reg = []
+    epoch_avg_loss_objectness = []
+    epoch_avg_loss_rpn_box_reg = []
+    epoch_avg_loss_keypoints = []
+    epoch_precision = []
+    epoch_recall = []
+    epoch_f1_score = []
+    epoch_test_loss = []
+    start_tot = time.time()
+    best_metrics = -1000
+    best_epoch = 0
+    best_model_state = None
+    same = 0
+    learning_rate = optimizer.param_groups[0]['lr']
+    bad_test_loss = 0
+    previous_test_loss = 1000
+    if parameters is not None:
+        batch_size, crop_prob, rotate_90_proba, h_flip_prob, v_flip_prob, max_rotate_deg, rotate_proba, keep_ratio = parameters.values()
+    print(f"Let's go training {model_type} model with {num_epochs} epochs!")
+    if parameters is not None:
+        print(f"Learning rate: {learning_rate}, Batch size: {batch_size}, Crop prob: {crop_prob}, H flip prob: {h_flip_prob}, V flip prob: {v_flip_prob}, Max rotate deg: {max_rotate_deg}, Rotate proba: {rotate_proba}, Rotate 90 proba: {rotate_90_proba}, Keep ratio: {keep_ratio}")
+    for epoch in range(num_epochs):
+        if (epoch > 0 and (epoch) % change_learning_rate == 0) or bad_test_loss >= 3:
+            learning_rate = 0.7 * learning_rate
+            optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=learning_rate, eps=1e-08, betas=(0.9, 0.999))
+            if best_model_state is not None:
+                model.load_state_dict(best_model_state)
+            print(f'Learning rate changed to {learning_rate:.4} and the best epoch for now is {best_epoch}')
+            bad_test_loss = 0
+        if epoch > 0 and (epoch) == start_key:
+            print("Now it's training Keypoints also")
+            loss_config['loss_keypoint'] = True
+            for name, param in model.named_parameters():
+                if 'keypoint' in name:
+                    param.requires_grad = True
+        model.train()
+        start = time.time()
+        total_loss = 0
+        # Initialize lists to keep track of individual losses
+        loss_classifier_list = []
+        loss_box_reg_list = []
+        loss_objectness_list = []
+        loss_rpn_box_reg_list = []
+        loss_keypoints_list = []
+        # Create a tqdm progress bar
+        progress_bar = tqdm(data_loader, desc=f'Epoch {epoch + 1 + start_epoch}')
+        for images, targets_im in progress_bar:
+            images = [image.to(device) for image in images]
+            targets = [{k: v.clone().detach().to(device) for k, v in t.items()} for t in targets_im]
+            optimizer.zero_grad()
+            loss_dict = model(images, targets)
+            # Inside the training loop where losses are calculated:
+            losses = 0
+            if loss_config is not None:
+                for key, loss in loss_dict.items():
+                    if loss_config.get(key, False):
+                        if key == 'loss_classifier':
+                            loss *= 3
+                        losses += loss
+            else:
+                losses = sum(loss for key, loss in loss_dict.items())
+            # Collect individual losses
+            if loss_dict['loss_classifier']:
+                loss_classifier_list.append(loss_dict['loss_classifier'].item())
+            else:
+                loss_classifier_list.append(0)
+            if loss_dict['loss_box_reg']:
+                loss_box_reg_list.append(loss_dict['loss_box_reg'].item())
+            else:
+                loss_box_reg_list.append(0)
+            if loss_dict['loss_objectness']:
+                loss_objectness_list.append(loss_dict['loss_objectness'].item())
+            else:
+                loss_objectness_list.append(0)
+            if loss_dict['loss_rpn_box_reg']:
+                loss_rpn_box_reg_list.append(loss_dict['loss_rpn_box_reg'].item())
+            else:
+                loss_rpn_box_reg_list.append(0)
+            if 'loss_keypoint' in loss_dict:
+                loss_keypoints_list.append(loss_dict['loss_keypoint'].item())
+            else:
+                loss_keypoints_list.append(0)
+            losses.backward()
+            optimizer.step()
+            total_loss += losses.item()
+            # Update the description with the current loss
+            progress_bar.set_description(f'Epoch {epoch + 1 + start_epoch}, Loss: {losses.item():.4f}')
+        # Calculate average loss
+        avg_loss = total_loss / len(data_loader)
+        epoch_avg_losses.append(avg_loss)
+        epoch_avg_loss_classifier.append(np.mean(loss_classifier_list))
+        epoch_avg_loss_box_reg.append(np.mean(loss_box_reg_list))
+        epoch_avg_loss_objectness.append(np.mean(loss_objectness_list))
+        epoch_avg_loss_rpn_box_reg.append(np.mean(loss_rpn_box_reg_list))
+        epoch_avg_loss_keypoints.append(np.mean(loss_keypoints_list))
+        # Evaluate the model on the test set
+        if eval_metric == 'loss':
+            labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = 0, 0, 0, 0, 0, 0
+            avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
+            print(f"Epoch {epoch + 1 + start_epoch}, Average Training Loss: {avg_loss:.4f}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
+        else:
+            avg_test_loss = 0
+            labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = main_evaluation(model, subset_test_loader, score_threshold=0.5, iou_threshold=0.5, distance_threshold=10, key_correction=False, model_type=model_type)
+            print(f"Epoch {epoch + 1 + start_epoch}, Average Loss: {avg_loss:.4f}, Labels_precision: {labels_precision:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f} ", end=", ")
+            avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
+            print(f"Epoch {epoch + 1 + start_epoch}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
+        print(f"Time: {time.time() - start:.2f} [s]")
+        if eval_metric == 'f1_score':
+            metric_used = f1_score
+        elif eval_metric == 'precision':
+            metric_used = precision
+        elif eval_metric == 'recall':
+            metric_used = recall
+        else:
+            metric_used = -avg_test_loss
+        # Check if this epoch's model has the lowest average loss
+        if metric_used > best_metrics:
+            best_metrics = metric_used
+            best_epoch = epoch + 1 + start_epoch
+            best_model_state = copy.deepcopy(model.state_dict())
+        if epoch > 0 and f1_score > early_stop_f1_score:
+            same += 1
+        epoch_precision.append(precision)
+        epoch_recall.append(recall)
+        epoch_f1_score.append(f1_score)
+        epoch_test_loss.append(avg_test_loss)
+        name_model = f"model_{type(optimizer).__name__}_{epoch + 1 + start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob * 10)}_crop0{int(crop_prob * 10)}_flip0{int(h_flip_prob * 10)}_rotate0{int(rotate_proba * 10)}_{information_training}"
+        metrics_list = [epoch_avg_losses, epoch_avg_loss_classifier, epoch_avg_loss_box_reg, epoch_avg_loss_objectness, epoch_avg_loss_rpn_box_reg, epoch_avg_loss_keypoints, epoch_precision, epoch_recall, epoch_f1_score, epoch_test_loss]
+        if same >= 1:
+            torch.save(best_model_state, './models/' + name_model + '.pth')
+            write_results(name_model, metrics_list, start_epoch)
+            break
+        if (epoch + 1 + start_epoch) % 5 == 0:
+            torch.save(best_model_state, './models/' + name_model + '.pth')
+            model.load_state_dict(best_model_state)
+            write_results(name_model, metrics_list, start_epoch)
+        if avg_test_loss > previous_test_loss:
+            bad_test_loss += 1
+        previous_test_loss = avg_test_loss
+    print(f"\n Total time: {(time.time() - start_tot) / 60} minutes, Best Epoch is {best_epoch} with an {eval_metric} of {best_metrics:.4f}")
+    if best_model_state:
+        torch.save(best_model_state, './models/' + name_model + '.pth')
+        model.load_state_dict(best_model_state)
+        write_results(name_model, metrics_list, start_epoch)
+        print(f"Name of the best model: model_{type(optimizer).__name__}_{epoch + 1 + start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob * 10)}_crop0{int(crop_prob * 10)}_flip0{int(h_flip_prob * 10)}_rotate0{int(rotate_proba * 10)}_{information_training}")
+    return model

modules/utils.py CHANGED Viewed

@@ -1,59 +1,11 @@
-from torchvision.models.detection import keypointrcnn_resnet50_fpn
-from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
-from torchvision.models.detection.keypoint_rcnn import KeypointRCNNPredictor
-from torchvision.models.detection import KeypointRCNN_ResNet50_FPN_Weights
-import random
 import torch
-from torch.utils.data import Dataset
 import torchvision.transforms.functional as F
 import numpy as np
-from torch.utils.data.dataloader import default_collate
 import cv2
 import matplotlib.pyplot as plt
-from torch.utils.data import DataLoader, Subset, ConcatDataset
 import streamlit as st
-"""object_dict = {
-    0: 'background',
-    1: 'task',
-    2: 'exclusiveGateway',
-    3: 'eventBasedGateway',
-    4: 'event',
-    5: 'messageEvent',
-    6: 'timerEvent',
-    7: 'dataObject',
-    8: 'dataStore',
-    9: 'pool',
-    10: 'lane',
-}
-arrow_dict = {
-    0: 'background',
-    1: 'sequenceFlow',
-    2: 'dataAssociation',
-    3: 'messageFlow',
-}
-class_dict = {
-    0: 'background',
-    1: 'task',
-    2: 'exclusiveGateway',
-    3: 'eventBasedGateway',
-    4: 'event',
-    5: 'messageEvent',
-    6: 'timerEvent',
-    7: 'dataObject',
-    8: 'dataStore',
-    9: 'pool',
-    10: 'lane',
-    11: 'sequenceFlow',
-    12: 'dataAssociation',
-    13: 'messageFlow',
-}"""
 object_dict = {
     0: 'background',
     1: 'task',
@@ -96,7 +48,6 @@ class_dict = {
     15: 'messageFlow',
 }
 def is_inside(box1, box2):
     """Check if the center of box1 is inside box2."""
     x_center = (box1[0] + box1[2]) / 2
@@ -107,51 +58,31 @@ def is_vertical(box):
     """Determine if the text in the bounding box is vertically aligned."""
     width = box[2] - box[0]
     height = box[3] - box[1]
-    return (height > 2*width)
 def rescale_boxes(scale, boxes):
     for i in range(len(boxes)):
-                boxes[i] = [boxes[i][0]*scale,
-                            boxes[i][1]*scale,
-                            boxes[i][2]*scale,
-                            boxes[i][3]*scale]
     return boxes
 def iou(box1, box2):
-    # Calcule l'intersection des deux boîtes englobantes
     inter_box = [max(box1[0], box2[0]), max(box1[1], box2[1]), min(box1[2], box2[2]), min(box1[3], box2[3])]
     inter_area = max(0, inter_box[2] - inter_box[0]) * max(0, inter_box[3] - inter_box[1])
-    # Calcule l'union des deux boîtes englobantes
     box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
     box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
     union_area = box1_area + box2_area - inter_area
     return inter_area / union_area
 def proportion_inside(box1, box2):
-    # Calculate the areas of both boxes
     box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
     box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
-    # Determine the bigger and smaller boxes
-    if box1_area > box2_area:
-        big_box = box1
-        small_box = box2
-    else:
-        big_box = box2
-        small_box = box1
-    # Calculate the intersection of the two bounding boxes
     inter_box = [max(small_box[0], big_box[0]), max(small_box[1], big_box[1]), min(small_box[2], big_box[2]), min(small_box[3], big_box[3])]
     inter_area = max(0, inter_box[2] - inter_box[0]) * max(0, inter_box[3] - inter_box[1])
-    # Calculate the proportion of the smaller box inside the bigger box
-    if (small_box[2] - small_box[0]) * (small_box[3] - small_box[1]) == 0:
-        return 0
     proportion = inter_area / ((small_box[2] - small_box[0]) * (small_box[3] - small_box[1]))
-    # Ensure the proportion is at most 100%
     return min(proportion, 1.0)
 def resize_boxes(boxes, original_size, target_size):
@@ -168,20 +99,15 @@ def resize_boxes(boxes, original_size, target_size):
     """
     orig_width, orig_height = original_size
     target_width, target_height = target_size
-    # Calculate the ratios for width and height
     width_ratio = target_width / orig_width
     height_ratio = target_height / orig_height
-    # Apply the ratios to the bounding boxes
     boxes[:, 0] *= width_ratio
     boxes[:, 1] *= height_ratio
     boxes[:, 2] *= width_ratio
     boxes[:, 3] *= height_ratio
     return boxes
-def resize_keypoints(keypoints: np.ndarray, original_size: tuple, target_size: tuple) -> np.ndarray:
     """
     Resize keypoints based on the original and target dimensions of an image.
@@ -192,40 +118,38 @@ def resize_keypoints(keypoints: np.ndarray, original_size: tuple, target_size: t
     Returns:
     - np.ndarray: The resized keypoints.
-    Explanation:
-    The function calculates the ratio of the target dimensions to the original dimensions.
-    It then applies these ratios to the x and y coordinates of each keypoint to scale them
-    appropriately to the target image size.
     """
     orig_width, orig_height = original_size
     target_width, target_height = target_size
-    # Calculate the ratios for width and height scaling
     width_ratio = target_width / orig_width
     height_ratio = target_height / orig_height
-    # Apply the scaling ratios to the x and y coordinates of each keypoint
-    keypoints[:, 0] *= width_ratio  # Scale x coordinates
-    keypoints[:, 1] *= height_ratio  # Scale y coordinates
     return keypoints
-def write_results(name_model,metrics_list,start_epoch):
-  with open('./results/'+ name_model+ '.txt', 'w') as f:
         for i in range(len(metrics_list[0])):
-          f.write(f"{i+1+start_epoch},{metrics_list[0][i]},{metrics_list[1][i]},{metrics_list[2][i]},{metrics_list[3][i]},{metrics_list[4][i]},{metrics_list[5][i]},{metrics_list[6][i]},{metrics_list[7][i]},{metrics_list[8][i]},{metrics_list[9][i]} \n")
 def find_other_keypoint(idx, keypoints, boxes):
     box = boxes[idx]
-    key1,key2 = keypoints[idx]
     x1, y1, x2, y2 = box
     center = ((x1 + x2) // 2, (y1 + y2) // 2)
     average_keypoint = (key1 + key2) // 2
-    #find the opposite keypoint to the center
     if average_keypoint[0] < center[0]:
         x = center[0] + abs(center[0] - average_keypoint[0])
     else:
@@ -235,7 +159,6 @@ def find_other_keypoint(idx, keypoints, boxes):
     else:
         y = center[1] - abs(center[1] - average_keypoint[1])
     return x, y, average_keypoint[0], average_keypoint[1]
 def filter_overlap_boxes(boxes, scores, labels, keypoints, iou_threshold=0.5):
     """
@@ -251,47 +174,28 @@ def filter_overlap_boxes(boxes, scores, labels, keypoints, iou_threshold=0.5):
     Returns:
     - tuple: Filtered boxes, scores, labels, and keypoints.
     """
-    # Calculate the area of each bounding box to use in IoU calculation.
     areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
-    # Sort the indices of the boxes based on their scores in descending order.
     order = scores.argsort()[::-1]
-    keep = []  # List to store indices of boxes to keep.
     while order.size > 0:
-        # Take the first index (highest score) from the sorted list.
         i = order[0]
-        keep.append(i)  # Add this index to 'keep' list.
-        # Compute the coordinates of the intersection rectangle.
         xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
         yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
         xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
         yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
-        # Compute the area of the intersection rectangle.
         w = np.maximum(0.0, xx2 - xx1)
         h = np.maximum(0.0, yy2 - yy1)
         inter = w * h
-        # Calculate IoU and find boxes with IoU less than the threshold to keep.
         iou = inter / (areas[i] + areas[order[1:]] - inter)
         inds = np.where(iou <= iou_threshold)[0]
-        # Update the list of box indices to consider in the next iteration.
-        order = order[inds + 1]  # Skip the first element since it's already included in 'keep'.
-    # Use the indices in 'keep' to select the boxes, scores, labels, and keypoints to return.
     boxes = boxes[keep]
     scores = scores[keep]
     labels = labels[keep]
     keypoints = keypoints[keep]
     return boxes, scores, labels, keypoints
 def draw_annotations(image,
                      target=None,
                      prediction=None,
@@ -312,7 +216,7 @@ def draw_annotations(image,
                      only_print=None,
                      axis=False,
                      return_image=False,
-                     new_size=(1333,800),
                      resize=False):
     """
     Draws annotations on images including bounding boxes, keypoints, links, and text.
@@ -328,7 +232,7 @@ def draw_annotations(image,
     - draw_boxes (bool): Flag to draw bounding boxes.
     - draw_text (bool): Flag to draw text annotations.
     - draw_links (bool): Flag to draw links between annotations.
-    - draw_twins (bool): Flag to draw twins keypoints.
     - write_class (bool): Flag to write class names near the annotations.
     - write_score (bool): Flag to write scores near the annotations.
     - write_text (bool): Flag to write OCR recognized text.
@@ -345,137 +249,119 @@ def draw_annotations(image,
     image_copy = image.copy()
     scale = max(image.shape[0], image.shape[1]) / 1000
-    # Function to draw bounding boxes and keypoints
-    def draw(data,is_prediction=False):
-        """ Helper function to draw annotations based on provided data. """
         for i in range(len(data['boxes'])):
             if is_prediction:
-                box = data['boxes'][i].tolist()
-                x1, y1, x2, y2 = box
-                if resize:
-                    x1, y1, x2, y2 = resize_boxes(np.array([box]), new_size, (image_copy.shape[1],image_copy.shape[0]))[0]
                 score = data['scores'][i].item()
                 if score < score_threshold:
                     continue
-            else:
-                box = data['boxes'][i].tolist()
-                x1, y1, x2, y2 = box
             if draw_boxes:
                 if only_print is not None:
                     if data['labels'][i] != list(model_dict.values()).index(only_print):
                         continue
-                cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 0) if is_prediction else (0, 0, 0), int(2*scale))
             if is_prediction and write_score:
-                cv2.putText(image_copy, str(round(score, 2)), (int(x1), int(y1) + int(15*scale)), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (100,100, 255), 2)
             if write_class and 'labels' in data:
                 class_id = data['labels'][i].item()
-                cv2.putText(image_copy, model_dict[class_id], (int(x1), int(y1) - int(2*scale)), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (255, 100, 100), 2)
             if write_idx:
-                cv2.putText(image_copy, str(i), (int(x1) + int(15*scale), int(y1) + int(15*scale)), cv2.FONT_HERSHEY_SIMPLEX, 2*scale, (0,0, 0), 2)
             # Draw keypoints if available
             if draw_keypoints and 'keypoints' in data:
                 if is_prediction and keypoints_correction:
                     for idx, (key1, key2) in enumerate(data['keypoints']):
                         if data['labels'][idx] not in [list(model_dict.values()).index('sequenceFlow'),
-                                    list(model_dict.values()).index('messageFlow'),
-                                    list(model_dict.values()).index('dataAssociation')]:
                             continue
-                        # Calculate the Euclidean distance between the two keypoints
                         distance = np.linalg.norm(key1[:2] - key2[:2])
                         if distance < 5:
-                            x_new,y_new, x,y = find_other_keypoint(idx, data['keypoints'], data['boxes'])
-                            data['keypoints'][idx][0] = torch.tensor([x_new, y_new,1])
-                            data['keypoints'][idx][1] = torch.tensor([x, y,1])
                             print("keypoint has been changed")
                 for i in range(len(data['keypoints'])):
                     kp = data['keypoints'][i]
                     for j in range(kp.shape[0]):
-                        if is_prediction and data['labels'][i] != list(model_dict.values()).index('sequenceFlow') and data['labels'][i] != list(model_dict.values()).index('messageFlow') and data['labels'][i] != list(model_dict.values()).index('dataAssociation'):
                             continue
                         if is_prediction:
                             score = data['scores'][i]
                             if score < score_threshold:
                                 continue
-                        x,y,v = np.array(kp[j])
                         if resize:
-                            x, y, v = resize_keypoints(np.array([kp[j]]), new_size, (image_copy.shape[1],image_copy.shape[0]))[0]
                         if j == 0:
-                            cv2.circle(image_copy, (int(x), int(y)), int(5*scale), (0, 0, 255), -1)
                         else:
-                            cv2.circle(image_copy, (int(x), int(y)), int(5*scale), (255, 0, 0), -1)
         # Draw text predictions if available
-        if (draw_text or write_text) and text_predictions is not None:
             for i in range(len(text_predictions[0])):
                 x1, y1, x2, y2 = text_predictions[0][i]
                 text = text_predictions[1][i]
                 if resize:
-                    x1, y1, x2, y2 = resize_boxes(np.array([[float(x1), float(y1), float(x2), float(y2)]]), new_size, (image_copy.shape[1],image_copy.shape[0]))[0]
                 if draw_text:
-                    cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), int(2*scale))
                 if write_text:
-                    cv2.putText(image_copy, text, (int(x1 + int(2*scale)), int((y1+y2)/2) ), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (0,0, 0), 2)
     def draw_with_links(full_prediction):
-        '''Draws links between objects based on the full prediction data.'''
-        #check if keypoints detected are the same
         if draw_twins and full_prediction is not None:
-            # Pre-calculate indices for performance
-            circle_color = (0, 255, 0)  # Green color for the circle
-            circle_radius = int(10 * scale)  # Circle radius scaled by image scale
             for idx, (key1, key2) in enumerate(full_prediction['keypoints']):
                 if full_prediction['labels'][idx] not in [list(model_dict.values()).index('sequenceFlow'),
-                         list(model_dict.values()).index('messageFlow'),
-                         list(model_dict.values()).index('dataAssociation')]:
                     continue
-                # Calculate the Euclidean distance between the two keypoints
                 distance = np.linalg.norm(key1[:2] - key2[:2])
                 if distance < 10:
-                    x_new,y_new, x,y = find_other_keypoint(idx,full_prediction)
                     cv2.circle(image_copy, (int(x), int(y)), circle_radius, circle_color, -1)
-                    cv2.circle(image_copy, (int(x_new), int(y_new)), circle_radius, (0,0,0), -1)
-        # Draw links between objects
-        if draw_links==True and full_prediction is not None:
             for i, (start_idx, end_idx) in enumerate(full_prediction['links']):
                 if start_idx is None or end_idx is None:
                     continue
                 start_box = full_prediction['boxes'][start_idx]
                 end_box = full_prediction['boxes'][end_idx]
                 current_box = full_prediction['boxes'][i]
-                # Calculate the center of each bounding box
                 start_center = ((start_box[0] + start_box[2]) // 2, (start_box[1] + start_box[3]) // 2)
                 end_center = ((end_box[0] + end_box[2]) // 2, (end_box[1] + end_box[3]) // 2)
                 current_center = ((current_box[0] + current_box[2]) // 2, (current_box[1] + current_box[3]) // 2)
-                # Draw a line between the centers of the connected objects
-                cv2.line(image_copy, (int(start_center[0]), int(start_center[1])), (int(current_center[0]), int(current_center[1])), (0, 0, 255), int(2*scale))
-                cv2.line(image_copy, (int(current_center[0]), int(current_center[1])), (int(end_center[0]), int(end_center[1])), (255, 0, 0), int(2*scale))
-                i+=1
-    # Draw GT annotations
     if target is not None:
         draw(target, is_prediction=False)
-    # Draw predictions
     if prediction is not None:
-        #prediction = prediction[0]
         draw(prediction, is_prediction=True)
-    # Draw links with full predictions
     if full_prediction is not None:
         draw_with_links(full_prediction)
-    # Display the image
     image_copy = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)
     plt.figure(figsize=(12, 12))
     plt.imshow(image_copy)
-    if axis==False:
         plt.axis('off')
     plt.show()
@@ -496,28 +382,24 @@ def find_closest_object(keypoint, boxes, labels):
     closest_object_idx = None
     best_point = None
     min_distance = float('inf')
-    # Iterate over each bounding box
     for i, box in enumerate(boxes):
         if labels[i] in [list(class_dict.values()).index('sequenceFlow'),
                          list(class_dict.values()).index('messageFlow'),
                          list(class_dict.values()).index('dataAssociation'),
-                         #list(class_dict.values()).index('pool'),
                          list(class_dict.values()).index('lane')]:
             continue
         x1, y1, x2, y2 = box
-        top = ((x1+x2)/2, y1)
-        bottom = ((x1+x2)/2, y2)
-        left = (x1, (y1+y2)/2)
-        right = (x2, (y1+y2)/2)
-        points = [left, top , right, bottom]
-        pos_dict = {0:'left', 1:'top', 2:'right', 3:'bottom'}
-        # Calculate the distance between the keypoint and the center of the bounding box
-        for pos, (point) in enumerate(points):
             distance = np.linalg.norm(keypoint[:2] - point)
-            # Update the closest object index if this object is closer
             if distance < min_distance:
                 min_distance = distance
                 closest_object_idx = i
@@ -525,9 +407,10 @@ def find_closest_object(keypoint, boxes, labels):
     return closest_object_idx, best_point
 def error(text='There is an error in the detection'):
     st.error(text, icon="🚨")
 def warning(text='Some element are maybe not detected, verify the results, try to modify the parameters or try to add it in the method and style step.'):
     st.warning(text, icon="⚠️")

 import torch
 import torchvision.transforms.functional as F
 import numpy as np
 import cv2
 import matplotlib.pyplot as plt
 import streamlit as st
+# Define dictionaries to map class indices to their corresponding names
 object_dict = {
     0: 'background',
     1: 'task',
     15: 'messageFlow',
 }
 def is_inside(box1, box2):
     """Check if the center of box1 is inside box2."""
     x_center = (box1[0] + box1[2]) / 2
     """Determine if the text in the bounding box is vertically aligned."""
     width = box[2] - box[0]
     height = box[3] - box[1]
+    return (height > 2 * width)
 def rescale_boxes(scale, boxes):
+    """Rescale the bounding boxes by a given scale factor."""
     for i in range(len(boxes)):
+        boxes[i] = [boxes[i][0] * scale, boxes[i][1] * scale, boxes[i][2] * scale, boxes[i][3] * scale]
     return boxes
 def iou(box1, box2):
+    """Calculate the Intersection over Union (IoU) of two bounding boxes."""
     inter_box = [max(box1[0], box2[0]), max(box1[1], box2[1]), min(box1[2], box2[2]), min(box1[3], box2[3])]
     inter_area = max(0, inter_box[2] - inter_box[0]) * max(0, inter_box[3] - inter_box[1])
     box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
     box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
     union_area = box1_area + box2_area - inter_area
     return inter_area / union_area
 def proportion_inside(box1, box2):
+    """Calculate the proportion of the smaller box inside the larger box."""
     box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
     box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    big_box, small_box = (box1, box2) if box1_area > box2_area else (box2, box1)
     inter_box = [max(small_box[0], big_box[0]), max(small_box[1], big_box[1]), min(small_box[2], big_box[2]), min(small_box[3], big_box[3])]
     inter_area = max(0, inter_box[2] - inter_box[0]) * max(0, inter_box[3] - inter_box[1])
     proportion = inter_area / ((small_box[2] - small_box[0]) * (small_box[3] - small_box[1]))
     return min(proportion, 1.0)
 def resize_boxes(boxes, original_size, target_size):
     """
     orig_width, orig_height = original_size
     target_width, target_height = target_size
     width_ratio = target_width / orig_width
     height_ratio = target_height / orig_height
     boxes[:, 0] *= width_ratio
     boxes[:, 1] *= height_ratio
     boxes[:, 2] *= width_ratio
     boxes[:, 3] *= height_ratio
     return boxes
+def resize_keypoints(keypoints, original_size, target_size):
     """
     Resize keypoints based on the original and target dimensions of an image.
     Returns:
     - np.ndarray: The resized keypoints.
     """
     orig_width, orig_height = original_size
     target_width, target_height = target_size
     width_ratio = target_width / orig_width
     height_ratio = target_height / orig_height
+    keypoints[:, 0] *= width_ratio
+    keypoints[:, 1] *= height_ratio
     return keypoints
+def write_results(name_model, metrics_list, start_epoch):
+    """Write training results to a text file."""
+    with open('./results/' + name_model + '.txt', 'w') as f:
         for i in range(len(metrics_list[0])):
+            f.write(f"{i + 1 + start_epoch},{metrics_list[0][i]},{metrics_list[1][i]},{metrics_list[2][i]},{metrics_list[3][i]},{metrics_list[4][i]},{metrics_list[5][i]},{metrics_list[6][i]},{metrics_list[7][i]},{metrics_list[8][i]},{metrics_list[9][i]} \n")
 def find_other_keypoint(idx, keypoints, boxes):
+    """
+    Find the opposite keypoint to the center of the box.
+    Parameters:
+    - idx (int): The index of the box and keypoints.
+    - keypoints (np.ndarray): The array of keypoints.
+    - boxes (np.ndarray): The array of bounding boxes.
+    Returns:
+    - tuple: The coordinates of the new keypoint and the average keypoint.
+    """
     box = boxes[idx]
+    key1, key2 = keypoints[idx]
     x1, y1, x2, y2 = box
     center = ((x1 + x2) // 2, (y1 + y2) // 2)
     average_keypoint = (key1 + key2) // 2
     if average_keypoint[0] < center[0]:
         x = center[0] + abs(center[0] - average_keypoint[0])
     else:
     else:
         y = center[1] - abs(center[1] - average_keypoint[1])
     return x, y, average_keypoint[0], average_keypoint[1]
 def filter_overlap_boxes(boxes, scores, labels, keypoints, iou_threshold=0.5):
     """
     Returns:
     - tuple: Filtered boxes, scores, labels, and keypoints.
     """
     areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
     order = scores.argsort()[::-1]
+    keep = []
     while order.size > 0:
         i = order[0]
+        keep.append(i)
         xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
         yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
         xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
         yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
         w = np.maximum(0.0, xx2 - xx1)
         h = np.maximum(0.0, yy2 - yy1)
         inter = w * h
         iou = inter / (areas[i] + areas[order[1:]] - inter)
         inds = np.where(iou <= iou_threshold)[0]
+        order = order[inds + 1]
     boxes = boxes[keep]
     scores = scores[keep]
     labels = labels[keep]
     keypoints = keypoints[keep]
     return boxes, scores, labels, keypoints
 def draw_annotations(image,
                      target=None,
                      prediction=None,
                      only_print=None,
                      axis=False,
                      return_image=False,
+                     new_size=(1333, 800),
                      resize=False):
     """
     Draws annotations on images including bounding boxes, keypoints, links, and text.
     - draw_boxes (bool): Flag to draw bounding boxes.
     - draw_text (bool): Flag to draw text annotations.
     - draw_links (bool): Flag to draw links between annotations.
+    - draw_twins (bool): Flag to draw twin keypoints.
     - write_class (bool): Flag to write class names near the annotations.
     - write_score (bool): Flag to write scores near the annotations.
     - write_text (bool): Flag to write OCR recognized text.
     image_copy = image.copy()
     scale = max(image.shape[0], image.shape[1]) / 1000
+    # Helper function to draw annotations based on provided data
+    def draw(data, is_prediction=False):
         for i in range(len(data['boxes'])):
+            box = data['boxes'][i].tolist()
+            x1, y1, x2, y2 = box
+            if resize:
+                x1, y1, x2, y2 = resize_boxes(np.array([box]), new_size, (image_copy.shape[1], image_copy.shape[0]))[0]
             if is_prediction:
                 score = data['scores'][i].item()
                 if score < score_threshold:
                     continue
             if draw_boxes:
                 if only_print is not None:
                     if data['labels'][i] != list(model_dict.values()).index(only_print):
                         continue
+                cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 0) if is_prediction else (0, 0, 0), int(2 * scale))
             if is_prediction and write_score:
+                cv2.putText(image_copy, str(round(score, 2)), (int(x1), int(y1) + int(15 * scale)), cv2.FONT_HERSHEY_SIMPLEX, scale / 2, (100, 100, 255), 2)
             if write_class and 'labels' in data:
                 class_id = data['labels'][i].item()
+                cv2.putText(image_copy, model_dict[class_id], (int(x1), int(y1) - int(2 * scale)), cv2.FONT_HERSHEY_SIMPLEX, scale / 2, (255, 100, 100), 2)
             if write_idx:
+                cv2.putText(image_copy, str(i), (int(x1) + int(15 * scale), int(y1) + int(15 * scale)), cv2.FONT_HERSHEY_SIMPLEX, 2 * scale, (0, 0, 0), 2)
             # Draw keypoints if available
             if draw_keypoints and 'keypoints' in data:
                 if is_prediction and keypoints_correction:
                     for idx, (key1, key2) in enumerate(data['keypoints']):
                         if data['labels'][idx] not in [list(model_dict.values()).index('sequenceFlow'),
+                                                       list(model_dict.values()).index('messageFlow'),
+                                                       list(model_dict.values()).index('dataAssociation')]:
                             continue
                         distance = np.linalg.norm(key1[:2] - key2[:2])
                         if distance < 5:
+                            x_new, y_new, x, y = find_other_keypoint(idx, data['keypoints'], data['boxes'])
+                            data['keypoints'][idx][0] = torch.tensor([x_new, y_new, 1])
+                            data['keypoints'][idx][1] = torch.tensor([x, y, 1])
                             print("keypoint has been changed")
                 for i in range(len(data['keypoints'])):
                     kp = data['keypoints'][i]
                     for j in range(kp.shape[0]):
+                        if is_prediction and data['labels'][i] not in [list(model_dict.values()).index('sequenceFlow'),
+                                                                       list(model_dict.values()).index('messageFlow'),
+                                                                       list(model_dict.values()).index('dataAssociation')]:
                             continue
                         if is_prediction:
                             score = data['scores'][i]
                             if score < score_threshold:
                                 continue
+                        x, y, v = np.array(kp[j])
                         if resize:
+                            x, y, v = resize_keypoints(np.array([kp[j]]), new_size, (image_copy.shape[1], image_copy.shape[0]))[0]
                         if j == 0:
+                            cv2.circle(image_copy, (int(x), int(y)), int(5 * scale), (0, 0, 255), -1)
                         else:
+                            cv2.circle(image_copy, (int(x), int(y)), int(5 * scale), (255, 0, 0), -1)
         # Draw text predictions if available
+        if (draw_text or write_text) and text_predictions is not None:
             for i in range(len(text_predictions[0])):
                 x1, y1, x2, y2 = text_predictions[0][i]
                 text = text_predictions[1][i]
                 if resize:
+                    x1, y1, x2, y2 = resize_boxes(np.array([[float(x1), float(y1), float(x2), float(y2)]]), new_size, (image_copy.shape[1], image_copy.shape[0]))[0]
                 if draw_text:
+                    cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), int(2 * scale))
                 if write_text:
+                    cv2.putText(image_copy, text, (int(x1 + int(2 * scale)), int((y1 + y2) / 2)), cv2.FONT_HERSHEY_SIMPLEX, scale / 2, (0, 0, 0), 2)
     def draw_with_links(full_prediction):
+        """Draws links between objects based on the full prediction data."""
         if draw_twins and full_prediction is not None:
+            circle_color = (0, 255, 0)
+            circle_radius = int(10 * scale)
             for idx, (key1, key2) in enumerate(full_prediction['keypoints']):
                 if full_prediction['labels'][idx] not in [list(model_dict.values()).index('sequenceFlow'),
+                                                          list(model_dict.values()).index('messageFlow'),
+                                                          list(model_dict.values()).index('dataAssociation')]:
                     continue
                 distance = np.linalg.norm(key1[:2] - key2[:2])
                 if distance < 10:
+                    x_new, y_new, x, y = find_other_keypoint(idx, full_prediction['keypoints'], full_prediction['boxes'])
                     cv2.circle(image_copy, (int(x), int(y)), circle_radius, circle_color, -1)
+                    cv2.circle(image_copy, (int(x_new), int(y_new)), circle_radius, (0, 0, 0), -1)
+        if draw_links and full_prediction is not None:
             for i, (start_idx, end_idx) in enumerate(full_prediction['links']):
                 if start_idx is None or end_idx is None:
                     continue
                 start_box = full_prediction['boxes'][start_idx]
                 end_box = full_prediction['boxes'][end_idx]
                 current_box = full_prediction['boxes'][i]
                 start_center = ((start_box[0] + start_box[2]) // 2, (start_box[1] + start_box[3]) // 2)
                 end_center = ((end_box[0] + end_box[2]) // 2, (end_box[1] + end_box[3]) // 2)
                 current_center = ((current_box[0] + current_box[2]) // 2, (current_box[1] + current_box[3]) // 2)
+                cv2.line(image_copy, (int(start_center[0]), int(start_center[1])), (int(current_center[0]), int(current_center[1])), (0, 0, 255), int(2 * scale))
+                cv2.line(image_copy, (int(current_center[0]), int(current_center[1])), (int(end_center[0]), int(end_center[1])), (255, 0, 0), int(2 * scale))
+                i += 1
     if target is not None:
         draw(target, is_prediction=False)
     if prediction is not None:
         draw(prediction, is_prediction=True)
     if full_prediction is not None:
         draw_with_links(full_prediction)
     image_copy = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)
     plt.figure(figsize=(12, 12))
     plt.imshow(image_copy)
+    if not axis:
         plt.axis('off')
     plt.show()
     closest_object_idx = None
     best_point = None
     min_distance = float('inf')
     for i, box in enumerate(boxes):
         if labels[i] in [list(class_dict.values()).index('sequenceFlow'),
                          list(class_dict.values()).index('messageFlow'),
                          list(class_dict.values()).index('dataAssociation'),
                          list(class_dict.values()).index('lane')]:
             continue
         x1, y1, x2, y2 = box
+        top = ((x1 + x2) / 2, y1)
+        bottom = ((x1 + x2) / 2, y2)
+        left = (x1, (y1 + y2) / 2)
+        right = (x2, (y1 + y2) / 2)
+        points = [left, top, right, bottom]
+        pos_dict = {0: 'left', 1: 'top', 2: 'right', 3: 'bottom'}
+        for pos, point in enumerate(points):
             distance = np.linalg.norm(keypoint[:2] - point)
             if distance < min_distance:
                 min_distance = distance
                 closest_object_idx = i
     return closest_object_idx, best_point
 def error(text='There is an error in the detection'):
+    """Display an error message using Streamlit."""
     st.error(text, icon="🚨")
 def warning(text='Some element are maybe not detected, verify the results, try to modify the parameters or try to add it in the method and style step.'):
+    """Display a warning message using Streamlit."""
     st.warning(text, icon="⚠️")