{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ee7e0c23-3fa5-4547-8598-7df27a3876c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "from icevision.all import *\n",
    "import icedata\n",
    "import PIL, requests\n",
    "import torch\n",
    "from torchvision import transforms\n",
    "import gradio as gr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "646cc218-f7de-4f32-a3d9-fccdc9b54592",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Download the dataset\n",
    "url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n",
    "dest_dir = \"fridge\"\n",
    "data_dir = icedata.load_data(url, dest_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "96184ca0-0b0a-4a20-8ab9-30dee6096588",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the parser\n",
    "parser = parsers.VOCBBoxParser(annotations_dir=data_dir / \"odFridgeObjects/annotations\", images_dir=data_dir / \"odFridgeObjects/images\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "dfa20f76-4970-479a-9497-871fe4cfd170",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fc8e676815314038a40c884c8c7f5b67",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[1mINFO    \u001b[0m\u001b[1m\u001b[0m - \u001b[1m\u001b[34m\u001b[1mAutofixing records\u001b[0m\u001b[1m\u001b[34m\u001b[0m\u001b[1m\u001b[0m | \u001b[36micevision.parsers.parser\u001b[0m:\u001b[36mparse\u001b[0m:\u001b[36m122\u001b[0m\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3db4bc1ae388495eb3b62289459a5c00",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<ClassMap: {'background': 0, 'carton': 1, 'milk_bottle': 2, 'can': 3, 'water_bottle': 4}>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Parse annotations to create records\n",
    "train_records, valid_records = parser.parse()\n",
    "parser.class_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "26d4f2f7-db51-413c-838f-f80c5898ab52",
   "metadata": {},
   "outputs": [],
   "source": [
    "class_map = parser.class_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "007b2e97-d546-4178-84e7-d4fe597f3731",
   "metadata": {},
   "outputs": [],
   "source": [
    "extra_args = {}\n",
    "model_type = models.torchvision.retinanet\n",
    "backbone = model_type.backbones.resnet50_fpn\n",
    "# Instantiate the model\n",
    "model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7b664cbf-3ab0-46df-a9d0-c4eb5c3c026d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Transforms\n",
    "# size is set to 384 because EfficientDet requires its inputs to be divisible by 128\n",
    "image_size = 384\n",
    "train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])\n",
    "valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])\n",
    "# Datasets\n",
    "train_ds = Dataset(train_records, train_tfms)\n",
    "valid_ds = Dataset(valid_records, valid_tfms)\n",
    "# Data Loaders\n",
    "train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)\n",
    "valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)\n",
    "metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]\n",
    "learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7bddb248-215d-4998-9d90-14ea6989c236",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/dnth/anaconda3/envs/icevision-gradio/lib/python3.8/site-packages/fastai/learner.py:56: UserWarning: Saved filed doesn't contain an optimizer state.\n",
      "  elif with_opt: warn(\"Saved filed doesn't contain an optimizer state.\")\n"
     ]
    }
   ],
   "source": [
    "learn = learn.load('model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "745315f6-8aa5-486e-a7bc-e11348bec6a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def show_preds(input_image, display_label, display_bbox, detection_threshold):\n",
    "\n",
    "    if detection_threshold==0: detection_threshold=0.5\n",
    "\n",
    "    img = PIL.Image.fromarray(input_image, 'RGB')\n",
    "\n",
    "    pred_dict  = model_type.end2end_detect(img, valid_tfms, model, class_map=class_map, detection_threshold=detection_threshold,\n",
    "                                           display_label=display_label, display_bbox=display_bbox, return_img=True, \n",
    "                                           font_size=16, label_color=\"#FF59D6\")\n",
    "\n",
    "    return pred_dict['img']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63ac7fab-2068-4dbc-a464-0551b6fc12b2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7860/\n",
      "Running on public URL: https://11839.gradio.app\n",
      "\n",
      "This share link will expire in 72 hours. To get longer links, send an email to: support@gradio.app\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/dnth/anaconda3/envs/icevision-gradio/lib/python3.8/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at  ../aten/src/ATen/native/TensorShape.cpp:2157.)\n",
      "  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]\n"
     ]
    }
   ],
   "source": [
    "# display_chkbox = gr.inputs.CheckboxGroup([\"Label\", \"BBox\"], label=\"Display\", default=True)\n",
    "display_chkbox_label = gr.inputs.Checkbox(label=\"Label\", default=True)\n",
    "display_chkbox_box = gr.inputs.Checkbox(label=\"Box\", default=True)\n",
    "\n",
    "detection_threshold_slider = gr.inputs.Slider(minimum=0, maximum=1, step=0.1, default=0.5, label=\"Detection Threshold\")\n",
    "\n",
    "outputs = gr.outputs.Image(type=\"pil\")\n",
    "\n",
    "# Option 1: Get an image from local drive\n",
    "gr_interface = gr.Interface(fn=show_preds, inputs=[\"image\", display_chkbox_label, display_chkbox_box,  detection_threshold_slider], outputs=outputs, title='IceApp - COCO')\n",
    "\n",
    "# #  Option 2: Grab an image from a webcam\n",
    "# gr_interface = gr.Interface(fn=show_preds, inputs=[\"webcam\", display_chkbox_label, display_chkbox_box,  detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=False)\n",
    "\n",
    "# #  Option 3: Continuous image stream from the webcam\n",
    "# gr_interface = gr.Interface(fn=show_preds, inputs=[\"webcam\", display_chkbox_label, display_chkbox_box,  detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=True)\n",
    "\n",
    "\n",
    "gr_interface.launch(inline=False, share=True, debug=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "727a3589-364b-4bfd-9c32-bef5ebe34dbe",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}