Spaces:

ssalb
/

story_generator

Paused

ssalb commited on 23 days ago

Commit

16746e5

1 Parent(s): 9db3ef0

Update space with latest code and dependencies on Sat Jan 4 09:51:32 UTC 2025

Files changed (1) hide show

story_beam_search/scoring.py CHANGED Viewed

@@ -137,10 +137,8 @@ class FluencyScorer(StoryScorer):
             # For each story in the batch
             for j in range(len(batch_stories)):
                 story_scores = []
-                input_ids = batch_inputs.input_ids[j : j + 1]  # Keep batch dimension
-                attention_mask = batch_inputs.attention_mask[
-                    j : j + 1
-                ]  # Get attention mask
                 # Only process tokens that aren't padding
                 valid_tokens = attention_mask[0].sum().item()
@@ -150,6 +148,11 @@ class FluencyScorer(StoryScorer):
                     masked_input_ids = input_ids.clone()
                     masked_input_ids[0, k] = mask_token_id
                     with torch.no_grad():
                         outputs = self.model(
                             input_ids=masked_input_ids, attention_mask=attention_mask

             # For each story in the batch
             for j in range(len(batch_stories)):
                 story_scores = []
+                input_ids = batch_inputs.input_ids[j : j + 1]
+                attention_mask = batch_inputs.attention_mask[j : j + 1]
                 # Only process tokens that aren't padding
                 valid_tokens = attention_mask[0].sum().item()
                     masked_input_ids = input_ids.clone()
                     masked_input_ids[0, k] = mask_token_id
+                    # Ensure token is within vocab range
+                    masked_input_ids = masked_input_ids.clamp(
+                        0, self.tokenizer.vocab_size - 1
+                    )
                     with torch.no_grad():
                         outputs = self.model(
                             input_ids=masked_input_ids, attention_mask=attention_mask