Spaces:

ManishThota
/

Super-Rapid-Annotator

Running on Zero

App Files Files Community

ManishThota commited on Jul 30, 2024

Commit

b54618b

verified ·

1 Parent(s): eba62a3

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -22

app.py CHANGED Viewed

@@ -2,26 +2,15 @@ from transformers import BitsAndBytesConfig, LlavaNextVideoForConditionalGenerat
 import torch
 import numpy as np
 import av
 import spaces
 import gradio as gr
 import os
 import json
-# Model Configuration
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16
-)
-model_name = 'llava-hf/LLaVA-NeXT-Video-7B-DPO-hf'
-# Load Model and Processor
-processor = LlavaNextVideoProcessor.from_pretrained(model_name)
-model = LlavaNextVideoForConditionalGeneration.from_pretrained(
-    model_name,
-    quantization_config=quantization_config,
-    device_map='auto'
-)
 @spaces.GPU
 def read_video_pyav(container, indices):
@@ -64,12 +53,10 @@ def process_video(video_file, question):
     output = model.generate(**input, **generate_kwargs)
     generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]
     return generated_text.split("ASSISTANT: ", 1)[-1].strip()
 @spaces.GPU
 def analyze_videos(video_files, selected_questions):
-    """Analyzes all videos with the selected questions."""
     all_results = {}
     questions = {
         "hands_free": "Examine the subject’s right and left hands in the video to check if they are holding anything like a microphone, book, paper(White color), object, or any electronic device, try segmentations and decide if the hands are free or not.",
         "standing/sitting": "Evaluate the subject’s body posture and movement within the video. Are they standing upright with both feet planted firmly on the ground? If so, they are standing. If they seem to be seated, they are seated.",
@@ -82,11 +69,34 @@ def analyze_videos(video_files, selected_questions):
         all_results[video_name] = {}
         for question_key in selected_questions:
             answer = process_video(video_file, questions[question_key])
-            # Simple True/False determination (You might want to refine this)
             all_results[video_name][question_key] = "true" if "yes" in answer.lower() else "false"
-    return json.dumps(all_results, indent=4)
-# Define Gradio interface
 iface = gr.Interface(
     fn=analyze_videos,
     inputs=[
@@ -94,10 +104,17 @@ iface = gr.Interface(
         gr.CheckboxGroup(["hands_free", "standing/sitting", "interaction_with_background", "indoors/outdoors"],
                         label="Select Questions to Apply")
     ],
-    outputs=gr.JSON(label="Analysis Results"),
     title="Video Analysis",
     description="Upload videos and select questions to analyze."
 )
 if __name__ == "__main__":
     iface.launch(debug=True)

 import torch
 import numpy as np
 import av
+import gc
 import spaces
 import gradio as gr
 import os
 import json
+import csv
+import io
+# ... (Model loading code remains the same)
 @spaces.GPU
 def read_video_pyav(container, indices):
     output = model.generate(**input, **generate_kwargs)
     generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]
     return generated_text.split("ASSISTANT: ", 1)[-1].strip()
 @spaces.GPU
 def analyze_videos(video_files, selected_questions):
+    """Analyzes videos, saves results to CSV, and returns CSV data and JSON."""
     all_results = {}
     questions = {
         "hands_free": "Examine the subject’s right and left hands in the video to check if they are holding anything like a microphone, book, paper(White color), object, or any electronic device, try segmentations and decide if the hands are free or not.",
         "standing/sitting": "Evaluate the subject’s body posture and movement within the video. Are they standing upright with both feet planted firmly on the ground? If so, they are standing. If they seem to be seated, they are seated.",
         all_results[video_name] = {}
         for question_key in selected_questions:
             answer = process_video(video_file, questions[question_key])
             all_results[video_name][question_key] = "true" if "yes" in answer.lower() else "false"
+        del answer
+        gc.collect()
+        torch.cuda.empty_cache()
+    # Create CSV content
+    csv_output = io.StringIO()
+    writer = csv.writer(csv_output)
+    header = ["Video File"] + list(questions.keys())
+    writer.writerow(header)
+    for video_name, results in all_results.items():
+        row = [video_name] + [results.get(key, "") for key in questions]
+        writer.writerow(row)
+    csv_content = csv_output.getvalue()
+    # Return both JSON and CSV
+    json_output = json.dumps(all_results, indent=4)
+    return json_output, csv_content
+def download_csv(csv_content):
+    """Creates a downloadable CSV file."""
+    return gr.File.update(
+        value=csv_content,
+        filename="video_analysis.csv",
+    )
+# Define Gradio interface
 iface = gr.Interface(
     fn=analyze_videos,
     inputs=[
         gr.CheckboxGroup(["hands_free", "standing/sitting", "interaction_with_background", "indoors/outdoors"],
                         label="Select Questions to Apply")
     ],
+    outputs=[
+        gr.JSON(label="Analysis Results (JSON)"),
+        gr.Textbox(label="CSV Results", lines=15),
+        gr.Button("Download CSV")
+    ],
     title="Video Analysis",
     description="Upload videos and select questions to analyze."
 )
+iface.download_button = iface.outputs[2]  # Assign the third output as the download button
+iface.outputs[2].click(download_csv, inputs=[iface.outputs[1]], outputs=[iface.download_button])
 if __name__ == "__main__":
     iface.launch(debug=True)