Spaces:

rodrigomasini
/

rephrase

Paused

rodrigomasini commited on Nov 8, 2023

Commit

557f7d8

•

1 Parent(s): 9fc5e2e

Update app_v4.py

Files changed (1) hide show

app_v4.py CHANGED Viewed

@@ -67,7 +67,7 @@ if model_loaded:
     with col1:
         user_input = st.text_input("Input a phrase")
     with col2:
-        max_token = st.number_input(label="Select max number of generated tokens", min_value=1, max_value=1024, value=50, step=5)
     # Generate button
     if st.button("Generate the prompt"):
@@ -75,17 +75,15 @@ if model_loaded:
             prompt_template = f'USER: {user_input}\nASSISTANT:'
             inputs = tokenizer(prompt_template, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
             inputs = inputs.to(device)  # Move inputs to the same device as model
-            # Generate text using torch.inference_mode for better performance during inference
             with torch.inference_mode():
-                output = model.generate(**inputs, max_new_tokens=max_token, num_return_sequences=2)
-            # Display generated texts
-            for i in range(2):  # Loop through the number of return sequences
-                output_ids_cut = output[i, inputs["input_ids"].shape[1]:]
-                generated_text = tokenizer.decode(output_ids_cut, skip_special_tokens=True)
-                st.markdown(f"**Generated Text {i+1}:**\n{generated_text}")
         except RuntimeError as e:
             if 'CUDA out of memory' in str(e):
                 st.error("CUDA out of memory during generation. Try reducing the input length or restarting the app.")

     with col1:
         user_input = st.text_input("Input a phrase")
     with col2:
+        max_token = st.number_input(label="Select max number of generated tokens", min_value=1, max_value=512, value=50, step=5)
     # Generate button
     if st.button("Generate the prompt"):
             prompt_template = f'USER: {user_input}\nASSISTANT:'
             inputs = tokenizer(prompt_template, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
             inputs = inputs.to(device)  # Move inputs to the same device as model
+        # Generate text using torch.inference_mode for better performance during inference
             with torch.inference_mode():
+                output = model.generate(**inputs, max_new_tokens=max_token)
+            # Cut the tokens at the input length to display only the generated text
+            output_ids_cut = output[:, inputs["input_ids"].shape[1]:]
+            generated_text = tokenizer.decode(output_ids_cut[0], skip_special_tokens=True)
+            st.markdown(f"**Generated Text:**\n{generated_text}")
         except RuntimeError as e:
             if 'CUDA out of memory' in str(e):
                 st.error("CUDA out of memory during generation. Try reducing the input length or restarting the app.")