Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -2,26 +2,15 @@ from transformers import BitsAndBytesConfig, LlavaNextVideoForConditionalGenerat
|
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
import av
|
|
|
5 |
import spaces
|
6 |
import gradio as gr
|
7 |
import os
|
8 |
import json
|
|
|
|
|
9 |
|
10 |
-
# Model
|
11 |
-
quantization_config = BitsAndBytesConfig(
|
12 |
-
load_in_4bit=True,
|
13 |
-
bnb_4bit_compute_dtype=torch.float16
|
14 |
-
)
|
15 |
-
|
16 |
-
model_name = 'llava-hf/LLaVA-NeXT-Video-7B-DPO-hf'
|
17 |
-
|
18 |
-
# Load Model and Processor
|
19 |
-
processor = LlavaNextVideoProcessor.from_pretrained(model_name)
|
20 |
-
model = LlavaNextVideoForConditionalGeneration.from_pretrained(
|
21 |
-
model_name,
|
22 |
-
quantization_config=quantization_config,
|
23 |
-
device_map='auto'
|
24 |
-
)
|
25 |
|
26 |
@spaces.GPU
|
27 |
def read_video_pyav(container, indices):
|
@@ -64,12 +53,10 @@ def process_video(video_file, question):
|
|
64 |
output = model.generate(**input, **generate_kwargs)
|
65 |
generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]
|
66 |
return generated_text.split("ASSISTANT: ", 1)[-1].strip()
|
67 |
-
|
68 |
@spaces.GPU
|
69 |
def analyze_videos(video_files, selected_questions):
|
70 |
-
"""Analyzes
|
71 |
all_results = {}
|
72 |
-
|
73 |
questions = {
|
74 |
"hands_free": "Examine the subject’s right and left hands in the video to check if they are holding anything like a microphone, book, paper(White color), object, or any electronic device, try segmentations and decide if the hands are free or not.",
|
75 |
"standing/sitting": "Evaluate the subject’s body posture and movement within the video. Are they standing upright with both feet planted firmly on the ground? If so, they are standing. If they seem to be seated, they are seated.",
|
@@ -82,11 +69,34 @@ def analyze_videos(video_files, selected_questions):
|
|
82 |
all_results[video_name] = {}
|
83 |
for question_key in selected_questions:
|
84 |
answer = process_video(video_file, questions[question_key])
|
85 |
-
# Simple True/False determination (You might want to refine this)
|
86 |
all_results[video_name][question_key] = "true" if "yes" in answer.lower() else "false"
|
87 |
-
return json.dumps(all_results, indent=4)
|
88 |
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
iface = gr.Interface(
|
91 |
fn=analyze_videos,
|
92 |
inputs=[
|
@@ -94,10 +104,17 @@ iface = gr.Interface(
|
|
94 |
gr.CheckboxGroup(["hands_free", "standing/sitting", "interaction_with_background", "indoors/outdoors"],
|
95 |
label="Select Questions to Apply")
|
96 |
],
|
97 |
-
outputs=
|
|
|
|
|
|
|
|
|
98 |
title="Video Analysis",
|
99 |
description="Upload videos and select questions to analyze."
|
100 |
)
|
101 |
|
|
|
|
|
|
|
102 |
if __name__ == "__main__":
|
103 |
iface.launch(debug=True)
|
|
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
import av
|
5 |
+
import gc
|
6 |
import spaces
|
7 |
import gradio as gr
|
8 |
import os
|
9 |
import json
|
10 |
+
import csv
|
11 |
+
import io
|
12 |
|
13 |
+
# ... (Model loading code remains the same)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
@spaces.GPU
|
16 |
def read_video_pyav(container, indices):
|
|
|
53 |
output = model.generate(**input, **generate_kwargs)
|
54 |
generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]
|
55 |
return generated_text.split("ASSISTANT: ", 1)[-1].strip()
|
|
|
56 |
@spaces.GPU
|
57 |
def analyze_videos(video_files, selected_questions):
|
58 |
+
"""Analyzes videos, saves results to CSV, and returns CSV data and JSON."""
|
59 |
all_results = {}
|
|
|
60 |
questions = {
|
61 |
"hands_free": "Examine the subject’s right and left hands in the video to check if they are holding anything like a microphone, book, paper(White color), object, or any electronic device, try segmentations and decide if the hands are free or not.",
|
62 |
"standing/sitting": "Evaluate the subject’s body posture and movement within the video. Are they standing upright with both feet planted firmly on the ground? If so, they are standing. If they seem to be seated, they are seated.",
|
|
|
69 |
all_results[video_name] = {}
|
70 |
for question_key in selected_questions:
|
71 |
answer = process_video(video_file, questions[question_key])
|
|
|
72 |
all_results[video_name][question_key] = "true" if "yes" in answer.lower() else "false"
|
|
|
73 |
|
74 |
+
del answer
|
75 |
+
gc.collect()
|
76 |
+
torch.cuda.empty_cache()
|
77 |
+
|
78 |
+
# Create CSV content
|
79 |
+
csv_output = io.StringIO()
|
80 |
+
writer = csv.writer(csv_output)
|
81 |
+
header = ["Video File"] + list(questions.keys())
|
82 |
+
writer.writerow(header)
|
83 |
+
for video_name, results in all_results.items():
|
84 |
+
row = [video_name] + [results.get(key, "") for key in questions]
|
85 |
+
writer.writerow(row)
|
86 |
+
csv_content = csv_output.getvalue()
|
87 |
+
|
88 |
+
# Return both JSON and CSV
|
89 |
+
json_output = json.dumps(all_results, indent=4)
|
90 |
+
return json_output, csv_content
|
91 |
+
|
92 |
+
def download_csv(csv_content):
|
93 |
+
"""Creates a downloadable CSV file."""
|
94 |
+
return gr.File.update(
|
95 |
+
value=csv_content,
|
96 |
+
filename="video_analysis.csv",
|
97 |
+
)
|
98 |
+
|
99 |
+
# Define Gradio interface
|
100 |
iface = gr.Interface(
|
101 |
fn=analyze_videos,
|
102 |
inputs=[
|
|
|
104 |
gr.CheckboxGroup(["hands_free", "standing/sitting", "interaction_with_background", "indoors/outdoors"],
|
105 |
label="Select Questions to Apply")
|
106 |
],
|
107 |
+
outputs=[
|
108 |
+
gr.JSON(label="Analysis Results (JSON)"),
|
109 |
+
gr.Textbox(label="CSV Results", lines=15),
|
110 |
+
gr.Button("Download CSV")
|
111 |
+
],
|
112 |
title="Video Analysis",
|
113 |
description="Upload videos and select questions to analyze."
|
114 |
)
|
115 |
|
116 |
+
iface.download_button = iface.outputs[2] # Assign the third output as the download button
|
117 |
+
iface.outputs[2].click(download_csv, inputs=[iface.outputs[1]], outputs=[iface.download_button])
|
118 |
+
|
119 |
if __name__ == "__main__":
|
120 |
iface.launch(debug=True)
|