jxu124 commited on
Commit
2b54ad7
1 Parent(s): f7a3ffd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -20
README.md CHANGED
@@ -9,9 +9,13 @@ language:
9
 
10
  TiO is an Interactive Visual Grounding Model for Disambiguation. (WIP)
11
 
12
- ## Online / offline Demo
 
 
 
13
 
14
  ```python
 
15
  from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
16
 
17
  model_id = "jxu124/TiO"
@@ -25,25 +29,32 @@ model = AutoModel.from_pretrained(
25
  )
26
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
27
  image_processor = AutoImageProcessor.from_pretrained(model_id)
28
- # setup gradio demo
29
- model.get_gradio_demo(tokenizer, image_processor).\
30
- queue(max_size=20).launch(server_name="0.0.0.0", server_port=7860)
31
  ```
32
 
33
  ## Mini-Example
34
  ```python
 
35
  from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
 
 
 
 
 
 
 
 
 
 
 
 
36
  from PIL import Image
37
  from io import BytesIO
38
  import torch
39
  import requests
40
 
41
- # Load model, tokenizer, image_processor
42
- tokenizer = AutoTokenizer.from_pretrained("jxu124/TiO", use_fast=False)
43
- image_processor = AutoImageProcessor.from_pretrained("jxu124/TiO")
44
- model = AutoModel.from_pretrained("jxu124/TiO", trust_remote_code=True)
45
- model = model.to(torch.float16).cuda() # It would be faster.
46
-
47
  # Prepare example
48
  image = Image.open(BytesIO(requests.get("http://images.cocodataset.org/val2014/COCO_val2014_000000429913.jpg").content))
49
  text = """\
@@ -64,25 +75,28 @@ print(tokenizer.batch_decode(gen, skip_special_tokens=True).replace("not yet.",
64
 
65
  Guesser(grounding):
66
  ```python
67
- text = """ #instruction: which region does the context describe? \n #context: \"\
 
 
68
  human: look that man in white!
69
  agent: is he the one who just threw the ball?
70
- human: yes. I mean the pitcher.\"
71
- """
72
  ```
73
 
74
  Questioner(question generation):
75
  ```python
76
- text = """ #instruction: guess what I want? \n #context: \"\
77
- human: look that man in white! \"
78
- """
 
79
  ```
80
 
81
  Oracle(answering):
82
  ```python
83
- text = """ #instruction: answer the question based on the region. \n #context: \"\
 
 
84
  agent: look that man in white!
85
- human: is he the one who just threw the ball? \"
86
- #region: <bin_847> <bin_319> <bin_923> <bin_467>
87
- """
88
  ```
 
9
 
10
  TiO is an Interactive Visual Grounding Model for Disambiguation. (WIP)
11
 
12
+ ## Online / Offline Demo
13
+
14
+ - [Colab Online Demo](https://colab.research.google.com/drive/195eDITKi6dahnVz8Cum91sNUCF_lFle8?usp=sharing) - Free T4 is available on Google Colab.
15
+ - Gradio Offline Demo:
16
 
17
  ```python
18
+ import os; os.system("pip3 install transformers accelerate bitsandbytes gradio fire")
19
  from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
20
 
21
  model_id = "jxu124/TiO"
 
29
  )
30
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
31
  image_processor = AutoImageProcessor.from_pretrained(model_id)
32
+
33
+ # ---- setup gradio demo ----
34
+ model.get_gradio_demo(tokenizer, image_processor).queue(max_size=20).launch(server_name="0.0.0.0", server_port=7860)
35
  ```
36
 
37
  ## Mini-Example
38
  ```python
39
+ import os; os.system("pip3 install transformers accelerate bitsandbytes gradio fire")
40
  from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
41
+
42
+ model_id = "jxu124/TiO"
43
+ model = AutoModel.from_pretrained(
44
+ model_id,
45
+ trust_remote_code=True,
46
+ torch_dtype=torch.float16,
47
+ device_map='cuda'
48
+ )
49
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
50
+ image_processor = AutoImageProcessor.from_pretrained(model_id)
51
+
52
+ # ---- mini example ----
53
  from PIL import Image
54
  from io import BytesIO
55
  import torch
56
  import requests
57
 
 
 
 
 
 
 
58
  # Prepare example
59
  image = Image.open(BytesIO(requests.get("http://images.cocodataset.org/val2014/COCO_val2014_000000429913.jpg").content))
60
  text = """\
 
75
 
76
  Guesser(grounding):
77
  ```python
78
+ text = """\
79
+ #instruction: which region does the context describe?
80
+ #context:
81
  human: look that man in white!
82
  agent: is he the one who just threw the ball?
83
+ human: yes. I mean the pitcher."""
 
84
  ```
85
 
86
  Questioner(question generation):
87
  ```python
88
+ text = """\
89
+ #instruction: guess what I want?
90
+ #context:
91
+ human: look that man in white!"""
92
  ```
93
 
94
  Oracle(answering):
95
  ```python
96
+ text = """\
97
+ #instruction: answer the question based on the region.
98
+ #context:
99
  agent: look that man in white!
100
+ human: is he the one who just threw the ball?
101
+ #region: <bin_847> <bin_319> <bin_923> <bin_467>"""
 
102
  ```