MichaelAI23
/

falcon-rw-1b_8bit_finetuned

Inference Endpoints

Model card Files Files and versions Community

Michael Brunzel commited on Sep 12, 2023

Commit

e48403c

•

1 Parent(s): d064821

Update generate

Files changed (1) hide show

handler.py +7 -1

handler.py CHANGED Viewed

@@ -23,6 +23,11 @@ class EndpointHandler:
         }
         self.instruction = """Extract the name of the person, the location, the hotel name and the desired date from the following hotel request"""
     def generate_prompt(
         self,
         template: str,
@@ -55,6 +60,7 @@ class EndpointHandler:
         parameters = data.pop("parameters", None)
         inputs = self.generate_prompt(self.template, self.instruction, inputs)
         # preprocess
         self.tokenizer.pad_token_id = (
             0  # unk. we want this to be different from the eos token
@@ -65,7 +71,7 @@ class EndpointHandler:
         if parameters is not None:
             outputs = self.model.generate(input_ids, **parameters)
         else:
-            outputs = self.model.generate(input_ids)
         # postprocess the prediction
         prediction = self.tokenizer.decode(outputs[0]) #, skip_special_tokens=True)

         }
         self.instruction = """Extract the name of the person, the location, the hotel name and the desired date from the following hotel request"""
+        if torch.cuda.is_available():
+            self.device = "cuda"
+        else:
+            self.device = "cpu"
     def generate_prompt(
         self,
         template: str,
         parameters = data.pop("parameters", None)
         inputs = self.generate_prompt(self.template, self.instruction, inputs)
+        input_ids = input_ids.to(self.device)
         # preprocess
         self.tokenizer.pad_token_id = (
             0  # unk. we want this to be different from the eos token
         if parameters is not None:
             outputs = self.model.generate(input_ids, **parameters)
         else:
+            outputs = self.model.generate(input_ids, max_new_tokens=64)
         # postprocess the prediction
         prediction = self.tokenizer.decode(outputs[0]) #, skip_special_tokens=True)