Spaces:
Running
on
T4
Running
on
T4
File size: 1,199 Bytes
b24d496 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import os
import requests
from llm_params import LlmParams
class Llm:
params: LlmParams = None
def __init__(self, params):
self.llm_api_endpoint = os.environ.get("LLM_API_ENDPOINT", "")
self.params = params
def predict(self, prompt: str = None):
response = requests.post(
url=self.params.url+'/completion',
json={'prompt': ' [INST] ' + prompt + ' [/INST]',
'temperature': 0.0,
'n_predict': 2500.0,
'top_p': 0.95,
'min_p': 0.05,
'repeat_penalty': 1.2,
'stop': []})
answer = response.json()['content']
return answer
def query_llamacpp(self, prompt: str):
response = requests.post(
url=self.params.url+'/completion',
json={'prompt': ' [INST] ' + prompt + ' [/INST]',
'temperature': 0.0,
'n_predict': 2500.0,
'top_p': 0.95,
'min_p': 0.05,
'repeat_penalty': 1.2,
'stop': []})
answer = response.json()['content']
return answer |