Spaces:

chaouch
/

YOLO_Object_Detection

Sleeping

App Files Files Community

chaouch commited on Jun 20, 2024

Commit

23eb270

1 Parent(s): db56307

done

Browse files

Files changed (5) hide show

app.py +57 -60
coco_classes.txt +80 -0
packages.txt +2 -0
requirements.txt +6 -1
yolo.py +318 -0

app.py CHANGED Viewed

@@ -1,63 +1,60 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import numpy as np
+import tensorflow as tf
+import cv2
+import os
+import requests
+from yolo import Yolo
+model_link = "https://intranet-projects-files.s3.amazonaws.com/holbertonschool-ml/yolo.h5"
+def download_model():
+    if not os.path.exists('data'):
+        os.makedirs('data')
+    if not os.path.exists('data/yolo.h5'):
+        print("Downloading model...")
+        r = requests.get(model_link)
+        with open('data/yolo.h5', 'wb') as f:
+            f.write(r.content)
+        print("Model downloaded")
+    else:
+        print("Model already exists locally.")
+def run(img):
+    np.random.seed(0)
+    anchors = np.array([[[116, 90], [156, 198], [373, 326]],
+                        [[30, 61], [62, 45], [59, 119]],
+                        [[10, 13], [16, 30], [33, 23]]])
+    yolo = Yolo('data/yolo.h5', 'coco_classes.txt', 0.6, 0.5, anchors)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    boxes, box_classes, box_scores = yolo.predict_frame(img)
+    for idx, box in enumerate(boxes):
+        top_left_x = int(box[0])
+        top_left_y = int(box[1])
+        bottom_right_x = int(box[2])
+        bottom_right_y = int(box[3])
+        class_name = yolo.class_names[box_classes[idx]]
+        score = box_scores[idx]
+        color = (255, 0, 0)
+        cv2.rectangle(img, (top_left_x, top_left_y),
+                      (bottom_right_x, bottom_right_y),
+                      color, 2)
+        text = f"{class_name} {score:.2f}"
+        cv2.putText(img, text, (top_left_x, top_left_y - 5),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1,
+                    cv2.LINE_AA)
+    # Convert image back from RGB to BGR for displaying with OpenCV
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+    return img
+demo = gr.Interface(run, "image", "image")
 if __name__ == "__main__":
+    download_model()
+    demo.launch()

coco_classes.txt ADDED Viewed

	@@ -0,0 +1,80 @@

+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ tesseract-ocr-all
2	+ ffmpeg

requirements.txt CHANGED Viewed

	@@ -1 +1,6 @@
1	- ~~huggingface_hub==0.22.2~~

+gradio
+tensorflow
+numpy
+ffmpeg
+requests
+opencv-python

yolo.py ADDED Viewed

	@@ -0,0 +1,318 @@

+#!/usr/bin/env python3
+"""this module contains the class Yolo"""
+import tensorflow.keras as K
+import numpy as np
+import os
+import cv2
+def sigmoid(x):
+    """sigmoid function"""
+    return 1 / (1 + np.exp(-x))
+class Yolo:
+    """Yolo class"""
+    def __init__(self, model_path, classes_path, class_t, nms_t, anchors):
+        """Constructor method
+        """
+        if not os.path.exists(model_path):
+            raise FileNotFoundError("Wrong model file path")
+        if not os.path.exists(classes_path):
+            raise FileNotFoundError("Wrong classes file path")
+        self.model = K.models.load_model(model_path)
+        with open(classes_path, 'r') as f:
+            self.class_names = [line[:-1] for line in f]
+        self.class_t = class_t
+        self.nms_t = nms_t
+        self.anchors = anchors
+    def process_outputs(self, outputs, image_size):
+        """Process and normalize the output of the YoloV3 model
+        outputs: list of numpy.ndarrays containing the predictions from the Darknet model for a single image
+        image_size: numpy.ndarray containing the image’s original size [image_height, image_width]
+        Returns a tuple of (boxes, box_confidences, box_class_probs)"""
+        boxes = []
+        box_confidences = []
+        box_class_probs = []
+        img_h, img_w = image_size
+        i = 0
+        for output in outputs:
+            grid_h, grid_w, nb_box, _ = output.shape
+            box_conf = sigmoid(output[:, :, :, 4:5])
+            box_prob = sigmoid(output[:, :, :, 5:])
+            box_confidences.append(box_conf)
+            box_class_probs.append(box_prob)
+            # t_x, t_y : x and y coordinates of the center pt of the anchor box
+            # t_w, t_h : width and height of the anchor box
+            t_x = output[:, :, :, 0]
+            t_y = output[:, :, :, 1]
+            t_w = output[:, :, :, 2]
+            t_h = output[:, :, :, 3]
+            # c_x, c_y : represents the grid of model
+            c_x = np.arange(grid_w)
+            c_x = np.tile(c_x, grid_h)
+            c_x = c_x.reshape(grid_h, grid_w, 1)
+            c_y = np.arange(grid_h)
+            c_y = np.tile(c_y, grid_w)
+            c_y = c_y.reshape(1, grid_h, grid_w).T
+            # p_w, p_h : anchors dimensions in the c
+            p_w = self.anchors[i, :, 0]
+            p_h = self.anchors[i, :, 1]
+            # yolo formula (get the coordinates in the prediction box)
+            b_x = (sigmoid(t_x) + c_x)
+            b_y = (sigmoid(t_y) + c_y)
+            b_w = (np.exp(t_w) * p_w)
+            b_h = (np.exp(t_h) * p_h)
+            # normalize to the input size
+            b_x = b_x / grid_w
+            b_y = b_y / grid_h
+            b_w = b_w / self.model.input.shape[1]
+            b_h = b_h / self.model.input.shape[2]
+            # scale to the image size (in pixels)
+            # top left corner
+            x1 = (b_x - b_w / 2) * img_w
+            y1 = (b_y - b_h / 2) * img_h
+            # bottom right corner
+            x2 = (b_x + b_w / 2) * img_w
+            y2 = (b_y + b_h / 2) * img_h
+            # create the current box
+            box = np.zeros((grid_h, grid_w, nb_box, 4))
+            box[:, :, :, 0] = x1
+            box[:, :, :, 1] = y1
+            box[:, :, :, 2] = x2
+            box[:, :, :, 3] = y2
+            boxes.append(box)
+            i += 1
+        return boxes, box_confidences, box_class_probs
+    def filter_boxes(self, boxes, box_confidences, box_class_probs):
+        """Filter boxes based on class confidence score.
+        Args:
+            boxes: (list of numpy.ndarray) List of numpy.ndarrays with shape
+                (grid_height, grid_width, anchor_boxes, 4) containing the
+                processed boundary boxes for each output.
+            box_confidences: (list of numpy.ndarray) List of np with shape
+                            (grid_height, grid_width, anchor_boxes, 1)
+            box_class_probs: (list of numpy.ndarray) List of np with shape
+                            (grid_height, grid_width, anchor_boxes, classes)
+                            the processed box class probabilities for output.
+        Returns:
+            - filtered_boxes: (?,4) ? = num of boxes, 4 = coordinates
+            - box_classes: (?,) ? = num of boxes and contains the class number
+            - box_scores: (?,) ? = num of boxes and contains the box scores
+        """
+        # Extract confidence scores for each class
+        class_t = self.class_t
+        scores = []
+        filtered_boxes = []
+        box_classes = []
+        box_scores = []
+        for i in range(len(boxes)):
+            # box_conf = conf_prob for box i
+            box_conf = box_confidences[i][..., 0]
+            # box_class_prob = class_prob for box i
+            box_class_prob = box_class_probs[i]
+            # box_class_indices = class index with highest score for box i
+            class_indices = np.argmax(box_class_prob, axis=-1)
+            # class_prob = highest score for box i
+            class_prob = np.max(box_class_prob, axis=-1)
+            # score for box i
+            score = box_conf * class_prob
+            # Filter based on the class threshold
+            # mask = boolean variable that tells if the score >= class_t
+            mask = score >= class_t
+            scores.append(score[mask])
+            filtered_boxes.append(boxes[i][mask])
+            box_classes.append(class_indices[mask])
+            box_scores.append(score[mask])
+        # Concatenate results
+        scores = np.concatenate(scores)
+        filtered_boxes = np.concatenate(filtered_boxes)
+        box_classes = np.concatenate(box_classes)
+        box_scores = np.concatenate(box_scores)
+        return filtered_boxes, box_classes, box_scores
+    def non_max_suppression(self, filtered_boxes, box_classes, box_scores):
+        """Non-max suppression.
+           filtered_boxes: (?, 4) contains all filtered bounding boxes
+              box_classes: (?,) contains the class number for the class that
+                            filtered_boxes predicts, respectively
+                box_scores: (?,) contains the box scores for each box in
+                            filtered_boxes, respectively
+            returns a tuple of
+                (box_predictions, predicted_box_classes, predicted_box_scores)
+            """
+        nms_t = self.nms_t
+        box_predictions = []
+        predicted_box_classes = []
+        predicted_box_scores = []
+        unique_classes = np.unique(box_classes)
+        for cls in unique_classes:
+            # Filter boxes, classes, and scores for the current class
+            idx = np.where(box_classes == cls)
+            boxes_of_cls = filtered_boxes[idx]
+            classes_of_cls = box_classes[idx]
+            scores_of_cls = box_scores[idx]
+            # Sort  by confidence scores from high to low
+            order = scores_of_cls.argsort()[::-1]
+            keep = []
+            x1 = boxes_of_cls[:, 0]
+            y1 = boxes_of_cls[:, 1]
+            x2 = boxes_of_cls[:, 2]
+            y2 = boxes_of_cls[:, 3]
+            # Calculate areas for all boxes in this class
+            areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+            while order.shape[0] > 0:
+                i = order[0]
+                keep.append(i)
+                # Intersection coord of the crnt box with the rest of boxes
+                xx1 = np.maximum(x1[i], x1[order[1:]])
+                yy1 = np.maximum(y1[i], y1[order[1:]])
+                xx2 = np.minimum(x2[i], x2[order[1:]])
+                yy2 = np.minimum(y2[i], y2[order[1:]])
+                # Intersection width and height
+                w = np.maximum(0.0, xx2 - xx1 + 1)
+                h = np.maximum(0.0, yy2 - yy1 + 1)
+                # Intersection area
+                inter = w * h
+                all_area = areas[i] + areas[order[1:]] - inter
+                overlap = inter / all_area
+                # First filter: boxes with overlap > nms_t
+                inds = np.where(overlap <= nms_t)[0]
+                # Second filter: remove boxes that match the current box
+                order = order[inds + 1]
+            box_predictions.append(boxes_of_cls[keep])
+            predicted_box_classes.append(classes_of_cls[keep])
+            predicted_box_scores.append(scores_of_cls[keep])
+        box_predictions = np.concatenate(box_predictions)
+        predicted_box_classes = np.concatenate(predicted_box_classes)
+        predicted_box_scores = np.concatenate(predicted_box_scores)
+        return box_predictions, predicted_box_classes, predicted_box_scores
+    @staticmethod
+    def load_images(folder_path):
+        """Load images from a folder"""
+        if not os.path.exists(folder_path):
+            return None
+        images = []
+        paths = []
+        image_paths = os.listdir(folder_path)
+        for image in image_paths:
+            img = cv2.imread(os.path.join(folder_path, image))
+            if img is not None:
+                images.append(img)
+                paths.append(os.path.join('./yolo', image))
+        return (images, paths)
+    def preprocess_images(self, images):
+        """Resize and rescale the images before process"""
+        input_w = self.model.input.shape[1]
+        input_h = self.model.input.shape[2]
+        image_shapes = []
+        pimages = []
+        for image in images:
+            image_shapes.append(image.shape[:2])
+            pimage = cv2.resize(image, (input_w, input_h),
+                                interpolation=cv2.INTER_CUBIC)
+            pimage = pimage / 255
+            pimages.append(pimage)
+        return np.array(pimages), np.array(image_shapes)
+    def show_boxes(self, image, boxes, box_classes, box_scores, file_name):
+        """Show the boxes in an image"""
+        imagec = image.copy()
+        for idx, box in enumerate(boxes):
+            top_left_x = int(box[0])
+            top_left_y = int(box[1])
+            bottom_right_x = int(box[2])
+            bottom_right_y = int(box[3])
+            class_name = self.class_names[box_classes[idx]]
+            score = box_scores[idx]
+            color = (255, 0, 0)
+            cv2.rectangle(imagec, (top_left_x, top_left_y),
+                          (bottom_right_x, bottom_right_y),
+                          color, 2)
+            text = class_name + " " + "{:.2f}".format(score)
+            cv2.putText(imagec, text, (top_left_x, top_left_y - 5),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1,
+                        cv2.LINE_AA)
+        cv2.imshow(file_name, imagec)
+        key = cv2.waitKey(0)
+        if key == ord('s'):
+            if not os.path.exists('./detections'):
+                os.mkdir('./detections')
+            cv2.imwrite(os.path.join('./detections', file_name), imagec)
+        cv2.destroyAllWindows()
+    def predict(self, folder_path):
+        """Predict the bounding boxes for all images in a folder"""
+        predictions = []
+        images, image_paths = self.load_images(folder_path)
+        pimages, image_shapes = self.preprocess_images(images)
+        outputs = self.model.predict(pimages)
+        for i in range(len(images)):
+            # process_outputs
+            poutouts = [out[i] for out in outputs]
+            boxes, box_confidences, box_class_probs = self.process_outputs(
+                poutouts, image_shapes[i])
+            # filter_boxes
+            filtered_boxes, box_classes, box_scores = self.filter_boxes(
+                boxes, box_confidences, box_class_probs)
+            # non_max_suppression
+            boxes, box_classes, box_scores = self.non_max_suppression(
+                filtered_boxes, box_classes, box_scores)
+            # show_boxes
+            self.show_boxes(images[i], boxes, box_classes, box_scores,
+                            image_paths[i].split('/')[-1])
+            predictions.append((boxes, box_classes, box_scores))
+        return predictions, image_paths
+    def predict_frame(self, frame):
+        """yolo algorithm on frame"""
+        # process frame
+        image = cv2.resize(frame, (self.model.input.shape[1],
+                                   self.model.input.shape[2]),
+                           interpolation=cv2.INTER_CUBIC)
+        image = image / 255
+        image = np.expand_dims(image, axis=0)
+        # predict
+        outputs = self.model.predict(image)
+        # process_outputs
+        poutouts = [out[0] for out in outputs]
+        boxes, box_confidences, box_class_probs = self.process_outputs(
+            poutouts, frame.shape[:2])
+        # filter_boxes
+        filtered_boxes, box_classes, box_scores = self.filter_boxes(
+            boxes, box_confidences, box_class_probs)
+        # non_max_suppression
+        boxes, box_classes, box_scores = self.non_max_suppression(
+            filtered_boxes, box_classes, box_scores)
+        # show_boxes
+        return boxes, box_classes, box_scores