Spaces:

THUdyh
/

Ola

Running on Zero

dongyh20 commited on 9 days ago

Commit

8a47087

1 Parent(s): 4ef079c

update space

Files changed (1) hide show

app.py CHANGED Viewed

@@ -154,6 +154,8 @@ def extract_audio(videos_file_path):
 @spaces.GPU(duration=120)
 def ola_inference(multimodal, audio_path):
     visual, text = multimodal["files"][0], multimodal["text"]
     if visual.endswith("image2.png"):
         modality = "video"
         visual = f"{cur_dir}/case/case1.mp4"
@@ -216,6 +218,8 @@ def ola_inference(multimodal, audio_path):
     else:
         qs = ''
     if USE_SPEECH and audio_path:
         qs = DEFAULT_IMAGE_TOKEN + "\n" + "User's question in speech: " + DEFAULT_SPEECH_TOKEN + '\n'
     elif USE_SPEECH:
         qs = DEFAULT_SPEECH_TOKEN + DEFAULT_IMAGE_TOKEN + "\n" + qs

 @spaces.GPU(duration=120)
 def ola_inference(multimodal, audio_path):
     visual, text = multimodal["files"][0], multimodal["text"]
+    if not visual:
+        return "ERROR: Image or Video is required.", None
     if visual.endswith("image2.png"):
         modality = "video"
         visual = f"{cur_dir}/case/case1.mp4"
     else:
         qs = ''
     if USE_SPEECH and audio_path:
+        if text:
+            return "ERROR: Please provide either text or audio question for image, not both.", None
         qs = DEFAULT_IMAGE_TOKEN + "\n" + "User's question in speech: " + DEFAULT_SPEECH_TOKEN + '\n'
     elif USE_SPEECH:
         qs = DEFAULT_SPEECH_TOKEN + DEFAULT_IMAGE_TOKEN + "\n" + qs