File size: 3,050 Bytes
39c47f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7862\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from transformers import AutoProcessor, AutoModelForCausalLM\n",
    "import gradio as gr\n",
    "import torch\n",
    "\n",
    "# Load the processor and model\n",
    "processor = AutoProcessor.from_pretrained(\"microsoft/git-base\")\n",
    "model = AutoModelForCausalLM.from_pretrained(\"./\")\n",
    "\n",
    "def predict(image):\n",
    "    try:\n",
    "        # Prepare the image using the processor\n",
    "        inputs = processor(images=image, return_tensors=\"pt\")\n",
    "\n",
    "        # Move inputs to the appropriate device\n",
    "        device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "        inputs = {key: value.to(device) for key, value in inputs.items()}\n",
    "        model.to(device)\n",
    "\n",
    "        # Generate the caption\n",
    "        outputs = model.generate(**inputs)\n",
    "\n",
    "        # Decode the generated caption\n",
    "        caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]\n",
    "\n",
    "        return caption\n",
    "\n",
    "    except Exception as e:\n",
    "        print(\"Error during prediction:\", str(e))\n",
    "        return \"Error: \" + str(e)\n",
    "\n",
    "# https://www.gradio.app/guides\n",
    "with gr.Blocks() as demo:\n",
    "    image = gr.Image(type=\"pil\")\n",
    "    predict_btn = gr.Button(\"Predict\", variant=\"primary\")\n",
    "    output = gr.Label(label=\"Generated Caption\")\n",
    "\n",
    "    inputs = [image]\n",
    "    outputs = [output]\n",
    "\n",
    "    predict_btn.click(predict, inputs=inputs, outputs=outputs)\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    demo.launch()  # Local machine only\n",
    "    # demo.launch(server_name=\"0.0.0.0\")  # LAN access to local machine\n",
    "    # demo.launch(share=True)  # Public access to local machine\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}