streaming inference supported?

#9
by weege007 - opened

use TextIteratorStreamer like this:

from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig,TextIteratorStreamer
from PIL import Image
import requests
from time import perf_counter

model_id = 'allenai/MolmoE-1B-0924'
model_id = 'allenai/Molmo-7B-D-0924'

# load the processor
processor = AutoProcessor.from_pretrained(
    model_id,
    trust_remote_code=True,
    torch_dtype='auto',
    device_map='auto'
)

# load the model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    torch_dtype='auto',
    device_map='auto'
)

times=[]
start_time = perf_counter()

# process the image and text
inputs = processor.process(
    images=[Image.open(requests.get("https://picsum.photos/id/237/536/354", stream=True).raw)],
    text="Describe this image.",
    #padding=True,
    #return_tensors="pt",
)

# move inputs to the correct device and make a batch of size 1
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}


# Inference: Generation of the output with TextStreamer
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
# Use Thread to run generation in background
# Otherwise, the process is blocked until generation is complete
# and no streaming effect can be observed.
from threading import Thread
generation_kwargs = dict(**inputs,max_new_tokens=200, stop_strings="<|endoftext|>",streamer=streamer)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

generated_text = ""
for new_text in streamer:
    print(new_text,end="")
    times.append(perf_counter() - start_time)
    generated_text += new_text
    start_time = perf_counter()
print(generated_text)

print("times",times)
print("total cost:",sum(times))

is error:

Exception in thread Thread-11 (generate):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 1957, in generate
    prepared_stopping_criteria = self._get_stopping_criteria(
  File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 948, in _get_stopping_criteria
    raise ValueError(
ValueError: There are one or more stop strings, either in the arguments to `generate` or in the model's generation config, but we could not locate a tokenizer. When generating with stop strings, you must pass the model's tokenizer to the `tokenizer` argument of `generate`.

yeah! is ok!

from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig,TextIteratorStreamer
from PIL import Image
import requests
from time import perf_counter

model_id = 'allenai/MolmoE-1B-0924'
model_id = 'allenai/Molmo-7B-D-0924'

# load the processor
processor = AutoProcessor.from_pretrained(
    model_id,
    trust_remote_code=True,
    torch_dtype='auto',
    device_map='auto'
)

# load the model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    torch_dtype='auto',
    device_map='auto'
)

times=[]
start_time = perf_counter()

# process the image and text
inputs = processor.process(
    images=[Image.open(requests.get("https://picsum.photos/id/237/536/354", stream=True).raw)],
    text="Describe this image.",
    #padding=True,
    #return_tensors="pt",
)

# move inputs to the correct device and make a batch of size 1
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}


# Inference: Generation of the output with TextStreamer
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
# Use Thread to run generation in background
# Otherwise, the process is blocked until generation is complete
# and no streaming effect can be observed.
from threading import Thread
generation_kwargs = dict(streamer=streamer,tokenizer=processor.tokenizer)
thread = Thread(target=model.generate_from_batch, args=(inputs,GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>")), kwargs=generation_kwargs)
thread.start()

generated_text = ""
for new_text in streamer:
    print(new_text,end="")
    times.append(perf_counter() - start_time)
    generated_text += new_text
    start_time = perf_counter()
print(generated_text)

print("times",times)
print("total cost:",sum(times))

run A100 40G on colab

 This image captures a young black Labrador puppy, likely around six months old, sitting on a weathered wooden deck. The puppy's sleek, short fur is entirely black, including its nose, eyes, and ears, which are slightly floppy. The dog is positioned in the center of the frame, looking up directly at the camera with a curious and attentive expression. Its front paws are visible, with one slightly tucked under its body, while its back paws are hidden from view. The wooden deck beneath the puppy is made of light brown planks with visible knots and signs of wear, adding a rustic charm to the scene. The overall composition is simple yet striking, with the puppy's glossy black coat contrasting beautifully against the light wooden background. This image captures a young black Labrador puppy, likely around six months old, sitting on a weathered wooden deck. The puppy's sleek, short fur is entirely black, including its nose, eyes, and ears, which are slightly floppy. The dog is positioned in the center of the frame, looking up directly at the camera with a curious and attentive expression. Its front paws are visible, with one slightly tucked under its body, while its back paws are hidden from view. The wooden deck beneath the puppy is made of light brown planks with visible knots and signs of wear, adding a rustic charm to the scene. The overall composition is simple yet striking, with the puppy's glossy black coat contrasting beautifully against the light wooden background.
times [4.1184672450000335, 0.1259763909999947, 0.04029189400000632, 0.04143443999998908, 0.0393988299999819, 0.039091404000032526, 0.038795281000034265, 0.03873853400000371, 0.03956079600004614, 0.04036988600000768, 0.03858942300007584, 0.03953495199993995, 0.03905083100005413, 0.03855302900001334, 0.0398302880000756, 0.04022023699997135, 0.039297477000104664, 0.040675269999951524, 0.04099108700006582, 0.039244918999997935, 0.03885962599997583, 0.04036941399999705, 0.03898160699998243, 0.04287940599999729, 0.03891989900000681, 0.0390602619999072, 0.04026413600001888, 0.04081131899999946, 0.03898506799998813, 0.04068021300008695, 0.038749141000039344, 0.038894211000069845, 0.04486314300004324, 0.04033141299998988, 0.03950026699999398, 0.03931632000001173, 0.03940672400005951, 0.039351677000013296, 0.0400197849999131, 0.038800519000005806, 0.038625899000066966, 0.03879732400002922, 0.03844953299994813, 0.038615720000052534, 0.046914319000052274, 0.04079339000008986, 0.039373402000023816, 0.03913330300008511, 0.040475439000033475, 0.04238029600003301, 0.04038120399991385, 0.03919793299996854, 0.03985229199997775, 0.03862109700003202, 0.039071728999942934, 0.03986469099993428, 0.0394440919999397, 0.03875089600001047, 0.03882110200004263, 0.039257496999994146, 0.0389652589999514, 0.04003696300003412, 0.03978470599997763, 0.03901883500009262, 0.03892853799993645, 0.038981267999929514, 0.03908283099997334, 0.03991614699998536, 0.03905695899993589, 0.03920949099995141, 0.03937203300006331, 0.038842425999973784, 0.03936440400002539, 0.03967475799993281, 0.039289498000016465, 0.039127414000063254, 0.039001273000053516, 0.039219174999971074, 0.03897438499996042, 0.04117561600003228, 0.03959038599998621, 0.03918464799994581, 0.039056994000020495, 0.039536552000072334, 0.03944160499997906, 0.040302065999981096, 0.039465673000108836, 0.03936466300001484, 0.03982358300004307, 0.039110337000010986, 0.03901152099990668, 0.0401586010000301, 0.03921912300006625, 0.03947212100001707, 0.04689344999997047, 0.04010179099998368, 0.03985182299993539, 0.039740199999982906, 0.03975475099991854, 0.039022434999992583, 0.039878007999959664, 0.040033833000052255, 0.04040963999989344, 0.039808446000051845, 0.039250757999980124, 0.03930387200000496, 0.039651883000033195, 0.03944345299998986, 0.04004076600006101, 0.04078536500003338, 0.040400193999971634, 0.03942205599992121, 0.039750957000023845, 0.04085607100000743, 0.039851016000056916, 0.039828095999951074, 0.03928377599993382, 0.03984419200003231, 0.03994267600000967, 0.040280590000065786, 0.03968490499994459, 0.03918449299999338, 0.04073014799996599, 0.03968114200006312, 0.040680010999949445, 0.04028002400002606, 0.0399213809999992, 0.03987532500002544, 0.039965294000012364, 0.039911655000082646, 0.042511125000032735, 0.04208850199995595, 0.04139655199992376, 0.03982710599996153, 0.04076774899999691, 0.03946689299993977, 0.04186883799991392, 0.03983607899999697, 0.039960805000077926, 0.03999595899995256, 0.04062519199999315, 0.03939121600001272, 0.040155736999963665, 0.03996564400006264, 0.03962665899996409, 0.040226764000067305, 0.04065318400000706, 0.04200165700001435, 0.03946603099996082, 0.040370285999983935, 0.0024612040000420166]
total cost: 10.149745032000396
chrisc36 changed discussion status to closed

Sign up or log in to comment