import os import requests from llm_params import LlmParams class Llm: params: LlmParams = None def __init__(self, params): self.llm_api_endpoint = os.environ.get("LLM_API_ENDPOINT", "") self.params = params def predict(self, prompt: str = None): response = requests.post( url=self.params.url+'/completion', json={'prompt': ' [INST] ' + prompt + ' [/INST]', 'temperature': 0.0, 'n_predict': 2500.0, 'top_p': 0.95, 'min_p': 0.05, 'repeat_penalty': 1.2, 'stop': []}) answer = response.json()['content'] return answer def query_llamacpp(self, prompt: str): response = requests.post( url=self.params.url+'/completion', json={'prompt': ' [INST] ' + prompt + ' [/INST]', 'temperature': 0.0, 'n_predict': 2500.0, 'top_p': 0.95, 'min_p': 0.05, 'repeat_penalty': 1.2, 'stop': []}) answer = response.json()['content'] return answer