ManishThota commited on
Commit
b54618b
·
verified ·
1 Parent(s): eba62a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -22
app.py CHANGED
@@ -2,26 +2,15 @@ from transformers import BitsAndBytesConfig, LlavaNextVideoForConditionalGenerat
2
  import torch
3
  import numpy as np
4
  import av
 
5
  import spaces
6
  import gradio as gr
7
  import os
8
  import json
 
 
9
 
10
- # Model Configuration
11
- quantization_config = BitsAndBytesConfig(
12
- load_in_4bit=True,
13
- bnb_4bit_compute_dtype=torch.float16
14
- )
15
-
16
- model_name = 'llava-hf/LLaVA-NeXT-Video-7B-DPO-hf'
17
-
18
- # Load Model and Processor
19
- processor = LlavaNextVideoProcessor.from_pretrained(model_name)
20
- model = LlavaNextVideoForConditionalGeneration.from_pretrained(
21
- model_name,
22
- quantization_config=quantization_config,
23
- device_map='auto'
24
- )
25
 
26
  @spaces.GPU
27
  def read_video_pyav(container, indices):
@@ -64,12 +53,10 @@ def process_video(video_file, question):
64
  output = model.generate(**input, **generate_kwargs)
65
  generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]
66
  return generated_text.split("ASSISTANT: ", 1)[-1].strip()
67
-
68
  @spaces.GPU
69
  def analyze_videos(video_files, selected_questions):
70
- """Analyzes all videos with the selected questions."""
71
  all_results = {}
72
-
73
  questions = {
74
  "hands_free": "Examine the subject’s right and left hands in the video to check if they are holding anything like a microphone, book, paper(White color), object, or any electronic device, try segmentations and decide if the hands are free or not.",
75
  "standing/sitting": "Evaluate the subject’s body posture and movement within the video. Are they standing upright with both feet planted firmly on the ground? If so, they are standing. If they seem to be seated, they are seated.",
@@ -82,11 +69,34 @@ def analyze_videos(video_files, selected_questions):
82
  all_results[video_name] = {}
83
  for question_key in selected_questions:
84
  answer = process_video(video_file, questions[question_key])
85
- # Simple True/False determination (You might want to refine this)
86
  all_results[video_name][question_key] = "true" if "yes" in answer.lower() else "false"
87
- return json.dumps(all_results, indent=4)
88
 
89
- # Define Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  iface = gr.Interface(
91
  fn=analyze_videos,
92
  inputs=[
@@ -94,10 +104,17 @@ iface = gr.Interface(
94
  gr.CheckboxGroup(["hands_free", "standing/sitting", "interaction_with_background", "indoors/outdoors"],
95
  label="Select Questions to Apply")
96
  ],
97
- outputs=gr.JSON(label="Analysis Results"),
 
 
 
 
98
  title="Video Analysis",
99
  description="Upload videos and select questions to analyze."
100
  )
101
 
 
 
 
102
  if __name__ == "__main__":
103
  iface.launch(debug=True)
 
2
  import torch
3
  import numpy as np
4
  import av
5
+ import gc
6
  import spaces
7
  import gradio as gr
8
  import os
9
  import json
10
+ import csv
11
+ import io
12
 
13
+ # ... (Model loading code remains the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  @spaces.GPU
16
  def read_video_pyav(container, indices):
 
53
  output = model.generate(**input, **generate_kwargs)
54
  generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]
55
  return generated_text.split("ASSISTANT: ", 1)[-1].strip()
 
56
  @spaces.GPU
57
  def analyze_videos(video_files, selected_questions):
58
+ """Analyzes videos, saves results to CSV, and returns CSV data and JSON."""
59
  all_results = {}
 
60
  questions = {
61
  "hands_free": "Examine the subject’s right and left hands in the video to check if they are holding anything like a microphone, book, paper(White color), object, or any electronic device, try segmentations and decide if the hands are free or not.",
62
  "standing/sitting": "Evaluate the subject’s body posture and movement within the video. Are they standing upright with both feet planted firmly on the ground? If so, they are standing. If they seem to be seated, they are seated.",
 
69
  all_results[video_name] = {}
70
  for question_key in selected_questions:
71
  answer = process_video(video_file, questions[question_key])
 
72
  all_results[video_name][question_key] = "true" if "yes" in answer.lower() else "false"
 
73
 
74
+ del answer
75
+ gc.collect()
76
+ torch.cuda.empty_cache()
77
+
78
+ # Create CSV content
79
+ csv_output = io.StringIO()
80
+ writer = csv.writer(csv_output)
81
+ header = ["Video File"] + list(questions.keys())
82
+ writer.writerow(header)
83
+ for video_name, results in all_results.items():
84
+ row = [video_name] + [results.get(key, "") for key in questions]
85
+ writer.writerow(row)
86
+ csv_content = csv_output.getvalue()
87
+
88
+ # Return both JSON and CSV
89
+ json_output = json.dumps(all_results, indent=4)
90
+ return json_output, csv_content
91
+
92
+ def download_csv(csv_content):
93
+ """Creates a downloadable CSV file."""
94
+ return gr.File.update(
95
+ value=csv_content,
96
+ filename="video_analysis.csv",
97
+ )
98
+
99
+ # Define Gradio interface
100
  iface = gr.Interface(
101
  fn=analyze_videos,
102
  inputs=[
 
104
  gr.CheckboxGroup(["hands_free", "standing/sitting", "interaction_with_background", "indoors/outdoors"],
105
  label="Select Questions to Apply")
106
  ],
107
+ outputs=[
108
+ gr.JSON(label="Analysis Results (JSON)"),
109
+ gr.Textbox(label="CSV Results", lines=15),
110
+ gr.Button("Download CSV")
111
+ ],
112
  title="Video Analysis",
113
  description="Upload videos and select questions to analyze."
114
  )
115
 
116
+ iface.download_button = iface.outputs[2] # Assign the third output as the download button
117
+ iface.outputs[2].click(download_csv, inputs=[iface.outputs[1]], outputs=[iface.download_button])
118
+
119
  if __name__ == "__main__":
120
  iface.launch(debug=True)