Spaces:

THUDM
/

CogVideoX-5B-Space

Running on L40S

App Files Files Community

multimodalart HF staff commited on Sep 6, 2024

Commit

b7be496

verified ·

1 Parent(s): d691db5

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -23

app.py CHANGED Viewed

@@ -55,36 +55,39 @@ Other times the user will not want modifications , but instead want a new image
 Video descriptions must have the same num of words as examples below. Extra words will be ignored.
 """
 def get_video_dimensions(input_video_path):
     reader = imageio_ffmpeg.read_frames(input_video_path)
     metadata = next(reader)
     return metadata['size']
 def center_crop_resize(input_video_path, target_width=720, target_height=480):
-    # Open the video file
     cap = cv2.VideoCapture(input_video_path)
-    # Get original video properties
     orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     orig_fps = cap.get(cv2.CAP_PROP_FPS)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    # Calculate resize factor
     width_factor = target_width / orig_width
     height_factor = target_height / orig_height
     resize_factor = max(width_factor, height_factor)
-    # Calculate intermediate size
     inter_width = int(orig_width * resize_factor)
     inter_height = int(orig_height * resize_factor)
-    # Calculate frame skip
     target_fps = 8
     ideal_skip = max(0, math.ceil(orig_fps / target_fps) - 1)
     skip = min(5, ideal_skip)  # Cap at 5
-    # Adjust skip if not enough frames
     while (total_frames / (skip + 1)) < 49 and skip > 0:
         skip -= 1
@@ -98,10 +101,8 @@ def center_crop_resize(input_video_path, target_width=720, target_height=480):
             break
         if total_read % (skip + 1) == 0:
-            # Resize frame
             resized = cv2.resize(frame, (inter_width, inter_height), interpolation=cv2.INTER_AREA)
-            # Center crop
             start_x = (inter_width - target_width) // 2
             start_y = (inter_height - target_height) // 2
             cropped = resized[start_y:start_y+target_height, start_x:start_x+target_width]
@@ -113,7 +114,6 @@ def center_crop_resize(input_video_path, target_width=720, target_height=480):
     cap.release()
-    # Save the processed video to a temporary file
     with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
         temp_video_path = temp_file.name
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
@@ -188,13 +188,12 @@ def infer(
         seed = random.randint(0, 2 ** 8 - 1)
     if(video_input):
         video = load_video(video_input)[:49]  # Limit to 49 frames
-        video_pt = pipe(
             video=video,
             prompt=prompt,
             num_inference_steps=num_inference_steps,
             num_videos_per_prompt=1,
             strength=video_strenght,
-            num_frames=49,
             use_dynamic_cfg=True,
             output_type="pt",
             guidance_scale=guidance_scale,
@@ -241,7 +240,7 @@ def delete_old_files():
 threading.Thread(target=delete_old_files, daemon=True).start()
-examples = [["horse.mp4", "Pixel art of a horse running"]]
 with gr.Blocks() as demo:
     gr.Markdown("""
@@ -265,12 +264,11 @@ with gr.Blocks() as demo:
            """)
     with gr.Row():
-        with gr.Accordion("Video-to-video", open=False):
-            video_input = gr.Video(label="Input Video (will be cropped to 49 frames, 6 seconds at 8fps)")
-            strength = gr.Slider(0.1, 1.0, value=0.8, step=0.01, label="Strength")
-            examples_component = gr.Examples(examples, fn=process_video, inputs=[input_video, prompt], outputs=output_video, cache_examples="lazy")
-            examples_component.dataset._components = [input_video]
         with gr.Column():
             prompt = gr.Textbox(label="Prompt (Less than 200 Words)", placeholder="Enter your prompt here", lines=5)
             with gr.Row():
@@ -366,14 +364,18 @@ with gr.Blocks() as demo:
     def generate(prompt,
                  seed_value,
                  scale_status,
                  rife_status,
-                 progress=gr.Progress(track_tqdm=True)
                 ):
         latents, seed = infer(
             prompt,
             num_inference_steps=50,  # NOT Changed
             guidance_scale=7.0,  # NOT Changed
             seed=seed_value,
@@ -409,17 +411,17 @@ with gr.Blocks() as demo:
     generate_button.click(
         generate,
-        inputs=[prompt, seed_param, enable_scale, enable_rife],
         outputs=[video_output, download_video_button, download_gif_button, seed_text],
     )
     enhance_button.click(enhance_prompt_func, inputs=[prompt], outputs=[prompt])
-    input_video.upload(
         resize_if_unfit,
-        inputs=[input_video],
-        outputs=[input_video]
     )
 if __name__ == "__main__":
     demo.queue(max_size=15)
-    demo.launch()

 Video descriptions must have the same num of words as examples below. Extra words will be ignored.
 """
+def resize_if_unfit(input_video, progress=gr.Progress(track_tqdm=True)):
+    width, height = get_video_dimensions(input_video)
+    if width == 720 and height == 480:
+        processed_video = input_video
+    else:
+        processed_video = center_crop_resize(input_video)
+    return processed_video
 def get_video_dimensions(input_video_path):
     reader = imageio_ffmpeg.read_frames(input_video_path)
     metadata = next(reader)
     return metadata['size']
 def center_crop_resize(input_video_path, target_width=720, target_height=480):
     cap = cv2.VideoCapture(input_video_path)
     orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     orig_fps = cap.get(cv2.CAP_PROP_FPS)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     width_factor = target_width / orig_width
     height_factor = target_height / orig_height
     resize_factor = max(width_factor, height_factor)
     inter_width = int(orig_width * resize_factor)
     inter_height = int(orig_height * resize_factor)
     target_fps = 8
     ideal_skip = max(0, math.ceil(orig_fps / target_fps) - 1)
     skip = min(5, ideal_skip)  # Cap at 5
     while (total_frames / (skip + 1)) < 49 and skip > 0:
         skip -= 1
             break
         if total_read % (skip + 1) == 0:
             resized = cv2.resize(frame, (inter_width, inter_height), interpolation=cv2.INTER_AREA)
             start_x = (inter_width - target_width) // 2
             start_y = (inter_height - target_height) // 2
             cropped = resized[start_y:start_y+target_height, start_x:start_x+target_width]
     cap.release()
     with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
         temp_video_path = temp_file.name
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         seed = random.randint(0, 2 ** 8 - 1)
     if(video_input):
         video = load_video(video_input)[:49]  # Limit to 49 frames
+        video_pt = pipe_video(
             video=video,
             prompt=prompt,
             num_inference_steps=num_inference_steps,
             num_videos_per_prompt=1,
             strength=video_strenght,
             use_dynamic_cfg=True,
             output_type="pt",
             guidance_scale=guidance_scale,
 threading.Thread(target=delete_old_files, daemon=True).start()
+examples = [["horse.mp4"], ["kitten.mp4"], ["train_running.mp4"]]
 with gr.Blocks() as demo:
     gr.Markdown("""
            """)
     with gr.Row():
         with gr.Column():
+            with gr.Accordion("Video-to-video", open=False):
+                video_input = gr.Video(label="Input Video (will be cropped to 49 frames, 6 seconds at 8fps)")
+                strength = gr.Slider(0.1, 1.0, value=0.8, step=0.01, label="Strength")
+                examples_component = gr.Examples(examples, inputs=[video_input], cache_examples=False)
             prompt = gr.Textbox(label="Prompt (Less than 200 Words)", placeholder="Enter your prompt here", lines=5)
             with gr.Row():
     def generate(prompt,
+                 video_input,
+                 video_strenght,
                  seed_value,
                  scale_status,
                  rife_status,
+                 #progress=gr.Progress(track_tqdm=True)
                 ):
         latents, seed = infer(
             prompt,
+            video_input,
+            video_strenght,
             num_inference_steps=50,  # NOT Changed
             guidance_scale=7.0,  # NOT Changed
             seed=seed_value,
     generate_button.click(
         generate,
+        inputs=[prompt, video_input, strength, seed_param, enable_scale, enable_rife],
         outputs=[video_output, download_video_button, download_gif_button, seed_text],
     )
     enhance_button.click(enhance_prompt_func, inputs=[prompt], outputs=[prompt])
+    video_input.upload(
         resize_if_unfit,
+        inputs=[video_input],
+        outputs=[video_input]
     )
 if __name__ == "__main__":
     demo.queue(max_size=15)
+    demo.launch()