kirankunapuli
commited on
Commit
•
c0ddfd5
1
Parent(s):
969a44b
Update app.py to use cache
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ def get_response(input_text: str) -> str:
|
|
36 |
return_tensors="pt",
|
37 |
).to(device)
|
38 |
|
39 |
-
outputs = model.generate(**inputs, max_new_tokens=256)
|
40 |
output = tokenizer.batch_decode(outputs)[0]
|
41 |
response_pattern = re.compile(r"### Response:\n(.*?)<eos>", re.DOTALL)
|
42 |
response_match = response_pattern.search(output)
|
|
|
36 |
return_tensors="pt",
|
37 |
).to(device)
|
38 |
|
39 |
+
outputs = model.generate(**inputs, max_new_tokens=256, use_cache=True)
|
40 |
output = tokenizer.batch_decode(outputs)[0]
|
41 |
response_pattern = re.compile(r"### Response:\n(.*?)<eos>", re.DOTALL)
|
42 |
response_match = response_pattern.search(output)
|