import torch from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import gradio as gr import time model = AutoModelForCausalLM.from_pretrained( "MILVLG/imp-v1-3b", torch_dtype=torch.float16, device_map="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained("MILVLG/imp-v1-3b", trust_remote_code=True) def response(USER_DATA, TOKEN) -> str: print(USER_DATA) MESSAGE = USER_DATA["text"] NUM_FILES = len(USER_DATA["files"]) FILES = USER_DATA["files"] SYSTEM_PROMPT = f""" A chat between a curious user and an artificial intelligence assistant. The assistant generates helpful, and detailed testcases for software/website testing. You are tasked with generating detailed, step-by-step test cases for software functionality based on uploaded images. The user will provide one or more images of a software or website interface. For each image, generate a separate set of test cases following the format below: Description: Provide a brief explanation of the functionality being tested, as inferred from the image. Pre-conditions: Identify any setup requirements, dependencies, or conditions that must be met before testing can begin (e.g., user logged in, specific data pre-populated, etc.). Testing Steps: Outline a clear, numbered sequence of actions that a user would take to test the functionality in the image. Expected Result: Specify the expected outcome if the functionality is working as intended. Ensure that: Test cases are created independently for each image. The functionality from each image is fully covered in its own set of test cases. Any assumptions you make are clearly stated. The focus is on usability, navigation, and feature correctness as demonstrated in the UI of the uploaded images. USER: \n{MESSAGE} ASSISTANT: """ RES = generate_answer(FILES, SYSTEM_PROMPT) response = f"{RES}." return response #for i in range(len(response)): # time.sleep(0.025) # yield response[: i + 1] def generate_answer(IMAGES: list, SYSTEM_PROMPT) -> str: print(len(IMAGES)) INPUT_IDS = tokenizer(SYSTEM_PROMPT, return_tensors="pt").input_ids RESULT = "" for EACH_IMG in IMAGES: image_path = EACH_IMG["path"] image = Image.open(image_path) image_tensor = model.image_preprocess(image) output_ids = model.generate( inputs=INPUT_IDS, max_new_tokens=500, images=image_tensor, use_cache=False, )[0] CUR_RESULT = tokenizer.decode( output_ids[INPUT_IDS.shape[1] :], skip_special_tokens=True ).strip() RESULT = f"{RESULT} /n/n {CUR_RESULT}" return RESULT