Spaces:

MNGames
/

MBase

Sleeping

App Files Files Community

MNGames commited on Jun 8, 2024

Commit

e006d42

verified ·

1 Parent(s): f3cd8b1

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -24

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
-from transformers import VideoMAEForVideoClassification, VideoMAEFeatureExtractor, VideoClassificationPipeline
 import cv2  # OpenCV for video processing
 # Model ID for video classification (UCF101 subset)
@@ -8,22 +9,25 @@ model_id = "MCG-NJU/videomae-base"
 def analyze_video(video):
     # Extract key frames from the video using OpenCV
     frames = extract_key_frames(video)
     # Load model and feature extractor manually
     model = VideoMAEForVideoClassification.from_pretrained(model_id)
-    feature_extractor = VideoMAEFeatureExtractor.from_pretrained(model_id)
-    # Create the pipeline
-    classifier = VideoClassificationPipeline(model=model, feature_extractor=feature_extractor, device=-1)
-    # Analyze key frames using video classification model
     results = []
-    for frame in frames:
-        # OpenCV uses BGR, convert to RGB for the model
-        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        predictions = classifier([frame_rgb])  # Assuming model outputs probabilities
-        # Analyze predictions for insights related to the play
-        result = analyze_predictions_ucf101(predictions)
         results.append(result)
     # Aggregate results across frames and provide a final analysis
@@ -40,24 +44,29 @@ def extract_key_frames(video):
     for i in range(frame_count):
         ret, frame = cap.read()
         if ret and i % (fps // 2) == 0:  # Extract a frame every half second
-            frames.append(frame)
     cap.release()
     return frames
-def analyze_predictions_ucf101(predictions):
-    # Analyze the model's predictions (probabilities) for insights relevant to baseball plays
-    # For simplicity, we'll assume predictions return the top-1 class
-    actions = [pred['label'] for pred in predictions]
     relevant_actions = ["running", "sliding", "jumping"]
-    runner_actions = [action for action in actions if action in relevant_actions]
-    # Check for 'running', 'sliding' actions as key indicators for safe/out decision
-    if "sliding" in runner_actions:
-        return "potentially safe"
-    elif "running" in runner_actions:
-        return "potentially out"
     else:
         return "inconclusive"
@@ -80,6 +89,7 @@ interface = gr.Interface(
     outputs="text",
     title="Baseball Play Analysis (UCF101 Subset Exploration)",
     description="Upload a video of a baseball play (safe/out at a base). This app explores using a video classification model (UCF101 subset) for analysis. Note: The model might not be specifically trained for baseball plays.",
 )
 interface.launch()

 import gradio as gr
+from transformers import VideoMAEForVideoClassification, VideoMAEImageProcessor
+import torch
 import cv2  # OpenCV for video processing
 # Model ID for video classification (UCF101 subset)
 def analyze_video(video):
     # Extract key frames from the video using OpenCV
     frames = extract_key_frames(video)
     # Load model and feature extractor manually
     model = VideoMAEForVideoClassification.from_pretrained(model_id)
+    processor = VideoMAEImageProcessor.from_pretrained(model_id)
+    # Prepare frames for the model
+    inputs = processor(images=frames, return_tensors="pt")
+    # Make predictions
+    with torch.no_grad():
+        outputs = model(**inputs)
+    logits = outputs.logits
+    predictions = torch.argmax(logits, dim=-1)
+    # Analyze predictions for insights related to the play
     results = []
+    for prediction in predictions:
+        result = analyze_predictions_ucf101(prediction.item())
         results.append(result)
     # Aggregate results across frames and provide a final analysis
     for i in range(frame_count):
         ret, frame = cap.read()
         if ret and i % (fps // 2) == 0:  # Extract a frame every half second
+            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))  # Convert to RGB
     cap.release()
     return frames
+def analyze_predictions_ucf101(prediction):
+    # Map prediction to action labels (this mapping is hypothetical)
+    action_labels = {
+        0: "running",
+        1: "sliding",
+        2: "jumping",
+        # Add more labels as necessary
+    }
+    action = action_labels.get(prediction, "unknown")
     relevant_actions = ["running", "sliding", "jumping"]
+    if action in relevant_actions:
+        if action == "sliding":
+            return "potentially safe"
+        elif action == "running":
+            return "potentially out"
+        else:
+            return "inconclusive"
     else:
         return "inconclusive"
     outputs="text",
     title="Baseball Play Analysis (UCF101 Subset Exploration)",
     description="Upload a video of a baseball play (safe/out at a base). This app explores using a video classification model (UCF101 subset) for analysis. Note: The model might not be specifically trained for baseball plays.",
+    share=True
 )
 interface.launch()