sahilsuneja commited on
Commit
5a5b6fb
·
verified ·
1 Parent(s): 7bb719b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -0
README.md CHANGED
@@ -124,3 +124,31 @@ curl 127.0.0.1:8080/generate_stream \
124
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
125
  -H 'Content-Type: application/json'
126
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
125
  -H 'Content-Type: application/json'
126
  ```
127
+
128
+ ### Use in vLLM
129
+ ```from vllm import LLM, SamplingParams
130
+
131
+ # Sample prompts.
132
+ prompts = [
133
+ "The president of the United States is",
134
+ ]
135
+ # Create a sampling params object.
136
+ sampling_params = SamplingParams(temperature=0.0)
137
+
138
+ # Create an LLM.
139
+ llm = LLM(
140
+ model="/path/to/Meta-Llama-3-70B-Instruct",
141
+ tensor_parallel_size=4,
142
+ speculative_model="/path/to/llama3-70b-accelerator",
143
+ speculative_draft_tensor_parallel_size=1,
144
+ use_v2_block_manager=True,
145
+ )
146
+ # Generate texts from the prompts. The output is a list of RequestOutput objects
147
+ # that contain the prompt, generated text, and other information.
148
+ outputs = llm.generate(prompts, sampling_params)
149
+ # Print the outputs.
150
+ for output in outputs:
151
+ prompt = output.prompt
152
+ generated_text = output.outputs[0].text
153
+ print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
154
+ ```