dongyh20 commited on
Commit
8a47087
·
1 Parent(s): 4ef079c

update space

Browse files
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -154,6 +154,8 @@ def extract_audio(videos_file_path):
154
  @spaces.GPU(duration=120)
155
  def ola_inference(multimodal, audio_path):
156
  visual, text = multimodal["files"][0], multimodal["text"]
 
 
157
  if visual.endswith("image2.png"):
158
  modality = "video"
159
  visual = f"{cur_dir}/case/case1.mp4"
@@ -216,6 +218,8 @@ def ola_inference(multimodal, audio_path):
216
  else:
217
  qs = ''
218
  if USE_SPEECH and audio_path:
 
 
219
  qs = DEFAULT_IMAGE_TOKEN + "\n" + "User's question in speech: " + DEFAULT_SPEECH_TOKEN + '\n'
220
  elif USE_SPEECH:
221
  qs = DEFAULT_SPEECH_TOKEN + DEFAULT_IMAGE_TOKEN + "\n" + qs
 
154
  @spaces.GPU(duration=120)
155
  def ola_inference(multimodal, audio_path):
156
  visual, text = multimodal["files"][0], multimodal["text"]
157
+ if not visual:
158
+ return "ERROR: Image or Video is required.", None
159
  if visual.endswith("image2.png"):
160
  modality = "video"
161
  visual = f"{cur_dir}/case/case1.mp4"
 
218
  else:
219
  qs = ''
220
  if USE_SPEECH and audio_path:
221
+ if text:
222
+ return "ERROR: Please provide either text or audio question for image, not both.", None
223
  qs = DEFAULT_IMAGE_TOKEN + "\n" + "User's question in speech: " + DEFAULT_SPEECH_TOKEN + '\n'
224
  elif USE_SPEECH:
225
  qs = DEFAULT_SPEECH_TOKEN + DEFAULT_IMAGE_TOKEN + "\n" + qs