from handler import EndpointHandler # Initialize handler my_handler = EndpointHandler(path="DAMO-NLP-SG/VideoLLaMA2-8x7B") # Prepare sample payloads video_payload = { "paths": ["assets/cat_and_chicken.mp4"], "questions": ["What animals are in the video, what are they doing, and how does the video feel?"], "modal_list": ["video"] } image_payload = { "paths": ["assets/sora.png"], "questions": ["What is the woman wearing, what is she doing, and how does the image feel?"], "modal_list": ["image"] } # Test the handler video_pred = my_handler(video_payload) image_pred = my_handler(image_payload) # Show results print("video_pred", video_pred) print("image_pred", image_pred) # Expected Output Examples: # video_pred [{'output': 'The video features a kitten and a baby chick playing together. The kitten is seen laying on the floor while the baby chick hops around. The two animals interact playfully with each other, and the video has a cute and heartwarming feel to it.'}] # image_pred [{'output': 'The woman in the image is wearing a black coat and sunglasses, and she is walking down a rain-soaked city street. The image feels vibrant and lively, with the bright city lights reflecting off the wet pavement, creating a visually appealing atmosphere.'}]