llm

Sleeping

Chris4K commited on 19 days ago

Commit

ae60821

verified ·

1 Parent(s): 3a7547b

Update services/strategy.py

Files changed (1) hide show

services/strategy.py CHANGED Viewed

@@ -68,14 +68,23 @@ class BestOfN(GenerationStrategy):
             # Tokenize the response for scoring with the PRM model
             response_inputs = generator.tokenizer(response, return_tensors="pt").to(generator.device)
-            # Extract the necessary inputs for prm_model
-            prm_input_ids = response_inputs["input_ids"]  # Always present
-            attention_mask = response_inputs["attention_mask"]  # Optional, depending on your model
-            # Pass only the required tensors to prm_model
-            prm_output = generator.prm_model(input_ids=prm_input_ids, attention_mask=attention_mask)
-            # Check the expected output structure for prm_model and use it accordingly
             score = prm_output.logits.mean().item() if hasattr(prm_output, 'logits') else 0.0
             # Append the response and its score
@@ -86,6 +95,7 @@ class BestOfN(GenerationStrategy):
 class BeamSearch(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device)

             # Tokenize the response for scoring with the PRM model
             response_inputs = generator.tokenizer(response, return_tensors="pt").to(generator.device)
+            # Pass the response to the PRM model based on its input requirements
+            try:
+                # Example 1: If PRM model accepts BatchEncoding
+                prm_output = generator.prm_model(response_inputs)
+                # Example 2: If PRM model expects only input_ids
+                # prm_output = generator.prm_model(response_inputs["input_ids"])
+                # Example 3: If PRM model expects raw text
+                # prm_output = generator.prm_model(response)
+            except Exception as e:
+                print(f"Error with PRM model: {e}")
+                score = 0.0
+                continue
+            # Calculate the score based on PRM output structure
             score = prm_output.logits.mean().item() if hasattr(prm_output, 'logits') else 0.0
             # Append the response and its score
 class BeamSearch(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device)