from handler import EndpointHandler

# Initialize handler
my_handler = EndpointHandler(path="DAMO-NLP-SG/VideoLLaMA2-8x7B")

# Prepare sample payloads
video_payload = {
    "paths": ["assets/cat_and_chicken.mp4"],
    "questions": ["What animals are in the video, what are they doing, and how does the video feel?"],
    "modal_list": ["video"]
}

image_payload = {
    "paths": ["assets/sora.png"],
    "questions": ["What is the woman wearing, what is she doing, and how does the image feel?"],
    "modal_list": ["image"]
}

# Test the handler
video_pred = my_handler(video_payload)
image_pred = my_handler(image_payload)

# Show results
print("video_pred", video_pred)
print("image_pred", image_pred)

# Expected Output Examples:
# video_pred [{'output': 'The video features a kitten and a baby chick playing together. The kitten is seen laying on the floor while the baby chick hops around. The two animals interact playfully with each other, and the video has a cute and heartwarming feel to it.'}]
# image_pred [{'output': 'The woman in the image is wearing a black coat and sunglasses, and she is walking down a rain-soaked city street. The image feels vibrant and lively, with the bright city lights reflecting off the wet pavement, creating a visually appealing atmosphere.'}]