chaouch commited on
Commit
23eb270
·
1 Parent(s): db56307
Files changed (5) hide show
  1. app.py +57 -60
  2. coco_classes.txt +80 -0
  3. packages.txt +2 -0
  4. requirements.txt +6 -1
  5. yolo.py +318 -0
app.py CHANGED
@@ -1,63 +1,60 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ import cv2
5
+ import os
6
+ import requests
7
+
8
+ from yolo import Yolo
9
+ model_link = "https://intranet-projects-files.s3.amazonaws.com/holbertonschool-ml/yolo.h5"
10
+
11
+ def download_model():
12
+ if not os.path.exists('data'):
13
+ os.makedirs('data')
14
+
15
+ if not os.path.exists('data/yolo.h5'):
16
+ print("Downloading model...")
17
+ r = requests.get(model_link)
18
+ with open('data/yolo.h5', 'wb') as f:
19
+ f.write(r.content)
20
+ print("Model downloaded")
21
+ else:
22
+ print("Model already exists locally.")
23
+ def run(img):
24
+ np.random.seed(0)
25
+
26
+ anchors = np.array([[[116, 90], [156, 198], [373, 326]],
27
+ [[30, 61], [62, 45], [59, 119]],
28
+ [[10, 13], [16, 30], [33, 23]]])
29
+ yolo = Yolo('data/yolo.h5', 'coco_classes.txt', 0.6, 0.5, anchors)
30
+
31
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
32
+
33
+ boxes, box_classes, box_scores = yolo.predict_frame(img)
34
+
35
+ for idx, box in enumerate(boxes):
36
+ top_left_x = int(box[0])
37
+ top_left_y = int(box[1])
38
+ bottom_right_x = int(box[2])
39
+ bottom_right_y = int(box[3])
40
+ class_name = yolo.class_names[box_classes[idx]]
41
+ score = box_scores[idx]
42
+ color = (255, 0, 0)
43
+ cv2.rectangle(img, (top_left_x, top_left_y),
44
+ (bottom_right_x, bottom_right_y),
45
+ color, 2)
46
+ text = f"{class_name} {score:.2f}"
47
+ cv2.putText(img, text, (top_left_x, top_left_y - 5),
48
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1,
49
+ cv2.LINE_AA)
50
+
51
+ # Convert image back from RGB to BGR for displaying with OpenCV
52
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
53
+
54
+ return img
55
+
56
+ demo = gr.Interface(run, "image", "image")
 
 
 
 
57
 
58
  if __name__ == "__main__":
59
+ download_model()
60
+ demo.launch()
coco_classes.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ person
2
+ bicycle
3
+ car
4
+ motorbike
5
+ aeroplane
6
+ bus
7
+ train
8
+ truck
9
+ boat
10
+ traffic light
11
+ fire hydrant
12
+ stop sign
13
+ parking meter
14
+ bench
15
+ bird
16
+ cat
17
+ dog
18
+ horse
19
+ sheep
20
+ cow
21
+ elephant
22
+ bear
23
+ zebra
24
+ giraffe
25
+ backpack
26
+ umbrella
27
+ handbag
28
+ tie
29
+ suitcase
30
+ frisbee
31
+ skis
32
+ snowboard
33
+ sports ball
34
+ kite
35
+ baseball bat
36
+ baseball glove
37
+ skateboard
38
+ surfboard
39
+ tennis racket
40
+ bottle
41
+ wine glass
42
+ cup
43
+ fork
44
+ knife
45
+ spoon
46
+ bowl
47
+ banana
48
+ apple
49
+ sandwich
50
+ orange
51
+ broccoli
52
+ carrot
53
+ hot dog
54
+ pizza
55
+ donut
56
+ cake
57
+ chair
58
+ sofa
59
+ pottedplant
60
+ bed
61
+ diningtable
62
+ toilet
63
+ tvmonitor
64
+ laptop
65
+ mouse
66
+ remote
67
+ keyboard
68
+ cell phone
69
+ microwave
70
+ oven
71
+ toaster
72
+ sink
73
+ refrigerator
74
+ book
75
+ clock
76
+ vase
77
+ scissors
78
+ teddy bear
79
+ hair drier
80
+ toothbrush
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tesseract-ocr-all
2
+ ffmpeg
requirements.txt CHANGED
@@ -1 +1,6 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
1
+ gradio
2
+ tensorflow
3
+ numpy
4
+ ffmpeg
5
+ requests
6
+ opencv-python
yolo.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """this module contains the class Yolo"""
3
+ import tensorflow.keras as K
4
+ import numpy as np
5
+ import os
6
+ import cv2
7
+
8
+
9
+ def sigmoid(x):
10
+ """sigmoid function"""
11
+ return 1 / (1 + np.exp(-x))
12
+
13
+
14
+ class Yolo:
15
+ """Yolo class"""
16
+
17
+ def __init__(self, model_path, classes_path, class_t, nms_t, anchors):
18
+ """Constructor method
19
+ """
20
+ if not os.path.exists(model_path):
21
+ raise FileNotFoundError("Wrong model file path")
22
+
23
+ if not os.path.exists(classes_path):
24
+ raise FileNotFoundError("Wrong classes file path")
25
+ self.model = K.models.load_model(model_path)
26
+ with open(classes_path, 'r') as f:
27
+ self.class_names = [line[:-1] for line in f]
28
+ self.class_t = class_t
29
+ self.nms_t = nms_t
30
+ self.anchors = anchors
31
+
32
+ def process_outputs(self, outputs, image_size):
33
+ """Process and normalize the output of the YoloV3 model
34
+ outputs: list of numpy.ndarrays containing the predictions from the Darknet model for a single image
35
+ image_size: numpy.ndarray containing the image’s original size [image_height, image_width]
36
+ Returns a tuple of (boxes, box_confidences, box_class_probs)"""
37
+ boxes = []
38
+ box_confidences = []
39
+ box_class_probs = []
40
+ img_h, img_w = image_size
41
+ i = 0
42
+ for output in outputs:
43
+ grid_h, grid_w, nb_box, _ = output.shape
44
+ box_conf = sigmoid(output[:, :, :, 4:5])
45
+ box_prob = sigmoid(output[:, :, :, 5:])
46
+ box_confidences.append(box_conf)
47
+ box_class_probs.append(box_prob)
48
+ # t_x, t_y : x and y coordinates of the center pt of the anchor box
49
+ # t_w, t_h : width and height of the anchor box
50
+ t_x = output[:, :, :, 0]
51
+ t_y = output[:, :, :, 1]
52
+ t_w = output[:, :, :, 2]
53
+ t_h = output[:, :, :, 3]
54
+ # c_x, c_y : represents the grid of model
55
+ c_x = np.arange(grid_w)
56
+ c_x = np.tile(c_x, grid_h)
57
+ c_x = c_x.reshape(grid_h, grid_w, 1)
58
+
59
+ c_y = np.arange(grid_h)
60
+ c_y = np.tile(c_y, grid_w)
61
+ c_y = c_y.reshape(1, grid_h, grid_w).T
62
+
63
+ # p_w, p_h : anchors dimensions in the c
64
+
65
+ p_w = self.anchors[i, :, 0]
66
+ p_h = self.anchors[i, :, 1]
67
+
68
+ # yolo formula (get the coordinates in the prediction box)
69
+ b_x = (sigmoid(t_x) + c_x)
70
+ b_y = (sigmoid(t_y) + c_y)
71
+ b_w = (np.exp(t_w) * p_w)
72
+ b_h = (np.exp(t_h) * p_h)
73
+ # normalize to the input size
74
+ b_x = b_x / grid_w
75
+ b_y = b_y / grid_h
76
+ b_w = b_w / self.model.input.shape[1]
77
+ b_h = b_h / self.model.input.shape[2]
78
+ # scale to the image size (in pixels)
79
+ # top left corner
80
+ x1 = (b_x - b_w / 2) * img_w
81
+ y1 = (b_y - b_h / 2) * img_h
82
+ # bottom right corner
83
+ x2 = (b_x + b_w / 2) * img_w
84
+ y2 = (b_y + b_h / 2) * img_h
85
+ # create the current box
86
+ box = np.zeros((grid_h, grid_w, nb_box, 4))
87
+ box[:, :, :, 0] = x1
88
+ box[:, :, :, 1] = y1
89
+ box[:, :, :, 2] = x2
90
+ box[:, :, :, 3] = y2
91
+ boxes.append(box)
92
+ i += 1
93
+ return boxes, box_confidences, box_class_probs
94
+
95
+ def filter_boxes(self, boxes, box_confidences, box_class_probs):
96
+ """Filter boxes based on class confidence score.
97
+
98
+ Args:
99
+ boxes: (list of numpy.ndarray) List of numpy.ndarrays with shape
100
+ (grid_height, grid_width, anchor_boxes, 4) containing the
101
+ processed boundary boxes for each output.
102
+ box_confidences: (list of numpy.ndarray) List of np with shape
103
+ (grid_height, grid_width, anchor_boxes, 1)
104
+ box_class_probs: (list of numpy.ndarray) List of np with shape
105
+ (grid_height, grid_width, anchor_boxes, classes)
106
+ the processed box class probabilities for output.
107
+
108
+ Returns:
109
+ - filtered_boxes: (?,4) ? = num of boxes, 4 = coordinates
110
+ - box_classes: (?,) ? = num of boxes and contains the class number
111
+ - box_scores: (?,) ? = num of boxes and contains the box scores
112
+ """
113
+
114
+ # Extract confidence scores for each class
115
+ class_t = self.class_t
116
+ scores = []
117
+ filtered_boxes = []
118
+ box_classes = []
119
+ box_scores = []
120
+
121
+ for i in range(len(boxes)):
122
+ # box_conf = conf_prob for box i
123
+ box_conf = box_confidences[i][..., 0]
124
+ # box_class_prob = class_prob for box i
125
+ box_class_prob = box_class_probs[i]
126
+ # box_class_indices = class index with highest score for box i
127
+ class_indices = np.argmax(box_class_prob, axis=-1)
128
+ # class_prob = highest score for box i
129
+ class_prob = np.max(box_class_prob, axis=-1)
130
+ # score for box i
131
+ score = box_conf * class_prob
132
+
133
+ # Filter based on the class threshold
134
+ # mask = boolean variable that tells if the score >= class_t
135
+ mask = score >= class_t
136
+ scores.append(score[mask])
137
+ filtered_boxes.append(boxes[i][mask])
138
+ box_classes.append(class_indices[mask])
139
+ box_scores.append(score[mask])
140
+
141
+ # Concatenate results
142
+ scores = np.concatenate(scores)
143
+ filtered_boxes = np.concatenate(filtered_boxes)
144
+ box_classes = np.concatenate(box_classes)
145
+ box_scores = np.concatenate(box_scores)
146
+
147
+ return filtered_boxes, box_classes, box_scores
148
+
149
+ def non_max_suppression(self, filtered_boxes, box_classes, box_scores):
150
+ """Non-max suppression.
151
+ filtered_boxes: (?, 4) contains all filtered bounding boxes
152
+ box_classes: (?,) contains the class number for the class that
153
+ filtered_boxes predicts, respectively
154
+ box_scores: (?,) contains the box scores for each box in
155
+ filtered_boxes, respectively
156
+ returns a tuple of
157
+ (box_predictions, predicted_box_classes, predicted_box_scores)
158
+
159
+ """
160
+ nms_t = self.nms_t
161
+ box_predictions = []
162
+ predicted_box_classes = []
163
+ predicted_box_scores = []
164
+ unique_classes = np.unique(box_classes)
165
+
166
+ for cls in unique_classes:
167
+ # Filter boxes, classes, and scores for the current class
168
+ idx = np.where(box_classes == cls)
169
+ boxes_of_cls = filtered_boxes[idx]
170
+ classes_of_cls = box_classes[idx]
171
+ scores_of_cls = box_scores[idx]
172
+
173
+ # Sort by confidence scores from high to low
174
+ order = scores_of_cls.argsort()[::-1]
175
+ keep = []
176
+
177
+ x1 = boxes_of_cls[:, 0]
178
+ y1 = boxes_of_cls[:, 1]
179
+ x2 = boxes_of_cls[:, 2]
180
+ y2 = boxes_of_cls[:, 3]
181
+
182
+ # Calculate areas for all boxes in this class
183
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
184
+
185
+ while order.shape[0] > 0:
186
+ i = order[0]
187
+ keep.append(i)
188
+
189
+ # Intersection coord of the crnt box with the rest of boxes
190
+ xx1 = np.maximum(x1[i], x1[order[1:]])
191
+ yy1 = np.maximum(y1[i], y1[order[1:]])
192
+ xx2 = np.minimum(x2[i], x2[order[1:]])
193
+ yy2 = np.minimum(y2[i], y2[order[1:]])
194
+
195
+ # Intersection width and height
196
+ w = np.maximum(0.0, xx2 - xx1 + 1)
197
+ h = np.maximum(0.0, yy2 - yy1 + 1)
198
+
199
+ # Intersection area
200
+ inter = w * h
201
+ all_area = areas[i] + areas[order[1:]] - inter
202
+ overlap = inter / all_area
203
+
204
+ # First filter: boxes with overlap > nms_t
205
+ inds = np.where(overlap <= nms_t)[0]
206
+ # Second filter: remove boxes that match the current box
207
+ order = order[inds + 1]
208
+
209
+ box_predictions.append(boxes_of_cls[keep])
210
+ predicted_box_classes.append(classes_of_cls[keep])
211
+ predicted_box_scores.append(scores_of_cls[keep])
212
+
213
+ box_predictions = np.concatenate(box_predictions)
214
+ predicted_box_classes = np.concatenate(predicted_box_classes)
215
+ predicted_box_scores = np.concatenate(predicted_box_scores)
216
+
217
+ return box_predictions, predicted_box_classes, predicted_box_scores
218
+
219
+ @staticmethod
220
+ def load_images(folder_path):
221
+ """Load images from a folder"""
222
+ if not os.path.exists(folder_path):
223
+ return None
224
+ images = []
225
+ paths = []
226
+ image_paths = os.listdir(folder_path)
227
+ for image in image_paths:
228
+ img = cv2.imread(os.path.join(folder_path, image))
229
+ if img is not None:
230
+ images.append(img)
231
+ paths.append(os.path.join('./yolo', image))
232
+ return (images, paths)
233
+
234
+ def preprocess_images(self, images):
235
+ """Resize and rescale the images before process"""
236
+ input_w = self.model.input.shape[1]
237
+ input_h = self.model.input.shape[2]
238
+ image_shapes = []
239
+ pimages = []
240
+ for image in images:
241
+ image_shapes.append(image.shape[:2])
242
+ pimage = cv2.resize(image, (input_w, input_h),
243
+ interpolation=cv2.INTER_CUBIC)
244
+ pimage = pimage / 255
245
+ pimages.append(pimage)
246
+ return np.array(pimages), np.array(image_shapes)
247
+
248
+ def show_boxes(self, image, boxes, box_classes, box_scores, file_name):
249
+ """Show the boxes in an image"""
250
+ imagec = image.copy()
251
+ for idx, box in enumerate(boxes):
252
+ top_left_x = int(box[0])
253
+ top_left_y = int(box[1])
254
+ bottom_right_x = int(box[2])
255
+ bottom_right_y = int(box[3])
256
+ class_name = self.class_names[box_classes[idx]]
257
+ score = box_scores[idx]
258
+ color = (255, 0, 0)
259
+ cv2.rectangle(imagec, (top_left_x, top_left_y),
260
+ (bottom_right_x, bottom_right_y),
261
+ color, 2)
262
+ text = class_name + " " + "{:.2f}".format(score)
263
+ cv2.putText(imagec, text, (top_left_x, top_left_y - 5),
264
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1,
265
+ cv2.LINE_AA)
266
+ cv2.imshow(file_name, imagec)
267
+ key = cv2.waitKey(0)
268
+ if key == ord('s'):
269
+ if not os.path.exists('./detections'):
270
+ os.mkdir('./detections')
271
+ cv2.imwrite(os.path.join('./detections', file_name), imagec)
272
+ cv2.destroyAllWindows()
273
+
274
+ def predict(self, folder_path):
275
+ """Predict the bounding boxes for all images in a folder"""
276
+ predictions = []
277
+ images, image_paths = self.load_images(folder_path)
278
+ pimages, image_shapes = self.preprocess_images(images)
279
+ outputs = self.model.predict(pimages)
280
+ for i in range(len(images)):
281
+ # process_outputs
282
+ poutouts = [out[i] for out in outputs]
283
+ boxes, box_confidences, box_class_probs = self.process_outputs(
284
+ poutouts, image_shapes[i])
285
+ # filter_boxes
286
+ filtered_boxes, box_classes, box_scores = self.filter_boxes(
287
+ boxes, box_confidences, box_class_probs)
288
+ # non_max_suppression
289
+ boxes, box_classes, box_scores = self.non_max_suppression(
290
+ filtered_boxes, box_classes, box_scores)
291
+ # show_boxes
292
+ self.show_boxes(images[i], boxes, box_classes, box_scores,
293
+ image_paths[i].split('/')[-1])
294
+ predictions.append((boxes, box_classes, box_scores))
295
+ return predictions, image_paths
296
+ def predict_frame(self, frame):
297
+ """yolo algorithm on frame"""
298
+ # process frame
299
+ image = cv2.resize(frame, (self.model.input.shape[1],
300
+ self.model.input.shape[2]),
301
+ interpolation=cv2.INTER_CUBIC)
302
+ image = image / 255
303
+ image = np.expand_dims(image, axis=0)
304
+ # predict
305
+ outputs = self.model.predict(image)
306
+ # process_outputs
307
+ poutouts = [out[0] for out in outputs]
308
+ boxes, box_confidences, box_class_probs = self.process_outputs(
309
+ poutouts, frame.shape[:2])
310
+ # filter_boxes
311
+ filtered_boxes, box_classes, box_scores = self.filter_boxes(
312
+ boxes, box_confidences, box_class_probs)
313
+ # non_max_suppression
314
+ boxes, box_classes, box_scores = self.non_max_suppression(
315
+ filtered_boxes, box_classes, box_scores)
316
+ # show_boxes
317
+ return boxes, box_classes, box_scores
318
+