{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ee7e0c23-3fa5-4547-8598-7df27a3876c5", "metadata": {}, "outputs": [], "source": [ "from icevision.all import *\n", "import icedata\n", "import PIL, requests\n", "import torch\n", "from torchvision import transforms\n", "import gradio as gr" ] }, { "cell_type": "code", "execution_count": 2, "id": "646cc218-f7de-4f32-a3d9-fccdc9b54592", "metadata": {}, "outputs": [], "source": [ "# Download the dataset\n", "url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n", "dest_dir = \"fridge\"\n", "data_dir = icedata.load_data(url, dest_dir)" ] }, { "cell_type": "code", "execution_count": 3, "id": "96184ca0-0b0a-4a20-8ab9-30dee6096588", "metadata": {}, "outputs": [], "source": [ "# Create the parser\n", "parser = parsers.VOCBBoxParser(annotations_dir=data_dir / \"odFridgeObjects/annotations\", images_dir=data_dir / \"odFridgeObjects/images\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "dfa20f76-4970-479a-9497-871fe4cfd170", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fc8e676815314038a40c884c8c7f5b67", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/128 [00:00" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Parse annotations to create records\n", "train_records, valid_records = parser.parse()\n", "parser.class_map" ] }, { "cell_type": "code", "execution_count": 5, "id": "26d4f2f7-db51-413c-838f-f80c5898ab52", "metadata": {}, "outputs": [], "source": [ "class_map = parser.class_map" ] }, { "cell_type": "code", "execution_count": 6, "id": "007b2e97-d546-4178-84e7-d4fe597f3731", "metadata": {}, "outputs": [], "source": [ "extra_args = {}\n", "model_type = models.torchvision.retinanet\n", "backbone = model_type.backbones.resnet50_fpn\n", "# Instantiate the model\n", "model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) " ] }, { "cell_type": "code", "execution_count": 7, "id": "7b664cbf-3ab0-46df-a9d0-c4eb5c3c026d", "metadata": {}, "outputs": [], "source": [ "# Transforms\n", "# size is set to 384 because EfficientDet requires its inputs to be divisible by 128\n", "image_size = 384\n", "train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])\n", "valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])\n", "# Datasets\n", "train_ds = Dataset(train_records, train_tfms)\n", "valid_ds = Dataset(valid_records, valid_tfms)\n", "# Data Loaders\n", "train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)\n", "valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)\n", "metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]\n", "learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)" ] }, { "cell_type": "code", "execution_count": 8, "id": "7bddb248-215d-4998-9d90-14ea6989c236", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/dnth/anaconda3/envs/icevision-gradio/lib/python3.8/site-packages/fastai/learner.py:56: UserWarning: Saved filed doesn't contain an optimizer state.\n", " elif with_opt: warn(\"Saved filed doesn't contain an optimizer state.\")\n" ] } ], "source": [ "learn = learn.load('model')" ] }, { "cell_type": "code", "execution_count": 9, "id": "745315f6-8aa5-486e-a7bc-e11348bec6a6", "metadata": {}, "outputs": [], "source": [ "def show_preds(input_image, display_label, display_bbox, detection_threshold):\n", "\n", " if detection_threshold==0: detection_threshold=0.5\n", "\n", " img = PIL.Image.fromarray(input_image, 'RGB')\n", "\n", " pred_dict = model_type.end2end_detect(img, valid_tfms, model, class_map=class_map, detection_threshold=detection_threshold,\n", " display_label=display_label, display_bbox=display_bbox, return_img=True, \n", " font_size=16, label_color=\"#FF59D6\")\n", "\n", " return pred_dict['img']" ] }, { "cell_type": "code", "execution_count": null, "id": "63ac7fab-2068-4dbc-a464-0551b6fc12b2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860/\n", "Running on public URL: https://11839.gradio.app\n", "\n", "This share link will expire in 72 hours. To get longer links, send an email to: support@gradio.app\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/dnth/anaconda3/envs/icevision-gradio/lib/python3.8/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)\n", " return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]\n" ] } ], "source": [ "# display_chkbox = gr.inputs.CheckboxGroup([\"Label\", \"BBox\"], label=\"Display\", default=True)\n", "display_chkbox_label = gr.inputs.Checkbox(label=\"Label\", default=True)\n", "display_chkbox_box = gr.inputs.Checkbox(label=\"Box\", default=True)\n", "\n", "detection_threshold_slider = gr.inputs.Slider(minimum=0, maximum=1, step=0.1, default=0.5, label=\"Detection Threshold\")\n", "\n", "outputs = gr.outputs.Image(type=\"pil\")\n", "\n", "# Option 1: Get an image from local drive\n", "gr_interface = gr.Interface(fn=show_preds, inputs=[\"image\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO')\n", "\n", "# # Option 2: Grab an image from a webcam\n", "# gr_interface = gr.Interface(fn=show_preds, inputs=[\"webcam\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=False)\n", "\n", "# # Option 3: Continuous image stream from the webcam\n", "# gr_interface = gr.Interface(fn=show_preds, inputs=[\"webcam\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=True)\n", "\n", "\n", "gr_interface.launch(inline=False, share=True, debug=True)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "727a3589-364b-4bfd-9c32-bef5ebe34dbe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }