llm

Sleeping

App Files Files Community

Chris4K commited on 18 days ago

Commit

f9deaa6

verified ·

1 Parent(s): ae60821

Update services/strategy.py

Browse files

Files changed (1) hide show

services/strategy.py +18 -7

services/strategy.py CHANGED Viewed

@@ -65,13 +65,24 @@ class BestOfN(GenerationStrategy):
             output = generator.model.generate(input_ids, **model_kwargs)
             response = generator.tokenizer.decode(output[0], skip_special_tokens=True)
             # Tokenize the response for scoring with the PRM model
-            response_inputs = generator.tokenizer(response, return_tensors="pt").to(generator.device)
             # Pass the response to the PRM model based on its input requirements
-            try:
                 # Example 1: If PRM model accepts BatchEncoding
-                prm_output = generator.prm_model(response_inputs)
                 # Example 2: If PRM model expects only input_ids
                 # prm_output = generator.prm_model(response_inputs["input_ids"])
@@ -79,10 +90,10 @@ class BestOfN(GenerationStrategy):
                 # Example 3: If PRM model expects raw text
                 # prm_output = generator.prm_model(response)
-            except Exception as e:
-                print(f"Error with PRM model: {e}")
-                score = 0.0
-                continue
             # Calculate the score based on PRM output structure
             score = prm_output.logits.mean().item() if hasattr(prm_output, 'logits') else 0.0

             output = generator.model.generate(input_ids, **model_kwargs)
             response = generator.tokenizer.decode(output[0], skip_special_tokens=True)
+            # Simple inference example
+            prm_output = llm(
+              "<|system|>\n{system_message}</s>\n<|user|>\n{response}</s>\n<|assistant|>", # Prompt
+              max_tokens=512,  # Generate up to 512 tokens
+              stop=["</s>"],   # Example stop token - not necessarily correct for this specific model! Please check before using.
+              echo=True        # Whether to echo the prompt
+            )
             # Tokenize the response for scoring with the PRM model
+            #response_inputs = generator.tokenizer(response, return_tensors="pt").to(generator.device)
             # Pass the response to the PRM model based on its input requirements
+            #try:
                 # Example 1: If PRM model accepts BatchEncoding
+#                prm_output = generator.prm_model(response_inputs)
                 # Example 2: If PRM model expects only input_ids
                 # prm_output = generator.prm_model(response_inputs["input_ids"])
                 # Example 3: If PRM model expects raw text
                 # prm_output = generator.prm_model(response)
+ #           except Exception as e:
+  #              print(f"Error with PRM model: {e}")
+   #             score = 0.0
+    #            continue
             # Calculate the score based on PRM output structure
             score = prm_output.logits.mean().item() if hasattr(prm_output, 'logits') else 0.0