Update README.md
Browse files
README.md
CHANGED
@@ -42,7 +42,7 @@ text = "Describe this image."
|
|
42 |
# process the image and text
|
43 |
inputs = processor.process(
|
44 |
images=[Image.open(requests.get(image_url, stream=True).raw)],
|
45 |
-
text=
|
46 |
)
|
47 |
|
48 |
# move inputs to the correct device and make a batch of size 1
|
@@ -50,7 +50,6 @@ inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
|
|
50 |
inputs["images"] = inputs["images"].to(model.dtype)
|
51 |
|
52 |
# generate output; maximum 200 new tokens; stop generation when <|endoftext|> is generated
|
53 |
-
# with torch.autocast(device_type="cuda", enabled=True, dtype=torch.bfloat16):
|
54 |
output = model.generate_from_batch(
|
55 |
inputs,
|
56 |
GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
|
@@ -73,19 +72,20 @@ print(generated_text)
|
|
73 |
image_url = "http://images.cocodataset.org/train2017/000000411975.jpg"
|
74 |
text = "How many people are there on the baseball field in the picture??"
|
75 |
##INT4:
|
76 |
-
##
|
77 |
|
78 |
##FP32:
|
79 |
-
##
|
80 |
|
81 |
|
82 |
image_url = "https://intelcorp.scene7.com/is/image/intelcorp/processor-overview-framed-badge:1920-1080?wid=480&hei=270"
|
83 |
text = "Which company does this image represent?"
|
84 |
##INT4:
|
85 |
-
## The image
|
86 |
|
87 |
##FP32:
|
88 |
-
## The image
|
|
|
89 |
```
|
90 |
|
91 |
### Generate the model
|
|
|
42 |
# process the image and text
|
43 |
inputs = processor.process(
|
44 |
images=[Image.open(requests.get(image_url, stream=True).raw)],
|
45 |
+
text=text
|
46 |
)
|
47 |
|
48 |
# move inputs to the correct device and make a batch of size 1
|
|
|
50 |
inputs["images"] = inputs["images"].to(model.dtype)
|
51 |
|
52 |
# generate output; maximum 200 new tokens; stop generation when <|endoftext|> is generated
|
|
|
53 |
output = model.generate_from_batch(
|
54 |
inputs,
|
55 |
GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
|
|
|
72 |
image_url = "http://images.cocodataset.org/train2017/000000411975.jpg"
|
73 |
text = "How many people are there on the baseball field in the picture??"
|
74 |
##INT4:
|
75 |
+
## Counting the <points x1="46.5" y1="37.1" x2="58.6" y2="48.3" x3="76.5" y3="33.0" alt="people on the baseball field">people on the baseball field</points> shows a total of 3.
|
76 |
|
77 |
##FP32:
|
78 |
+
## Counting the <points x1="46.5" y1="37.6" x2="58.5" y2="49.0" x3="76.0" y3="33.1" alt="people on the baseball field">people on the baseball field</points> shows a total of 3.
|
79 |
|
80 |
|
81 |
image_url = "https://intelcorp.scene7.com/is/image/intelcorp/processor-overview-framed-badge:1920-1080?wid=480&hei=270"
|
82 |
text = "Which company does this image represent?"
|
83 |
##INT4:
|
84 |
+
## The image represents Intel, a well-known technology company. The logo features the text "Intel" in white lowercase letters, followed by "INSIDE" in uppercase letters. This iconic logo design is instantly recognizable and has been a symbol of Intel's brand for many years.
|
85 |
|
86 |
##FP32:
|
87 |
+
## The image represents Intel, a well-known technology company. The logo features the text "Intel" in white lowercase letters, with "INSIDE" in uppercase letters below it. This iconic logo design is instantly recognizable and associated with Intel's brand in the computer industry.
|
88 |
+
|
89 |
```
|
90 |
|
91 |
### Generate the model
|