diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -7,788 +7,36 @@ import base64
 import json
 import gradio as gr
 import numpy as np
-from gradio import processing_utils
-import requests
-from packaging import version
-from PIL import Image, ImageDraw
+from PIL import Image
 import functools
 import emoji
 from langchain_community.chat_models import ChatOpenAI
 from langchain.schema import HumanMessage
 from caption_anything.model import CaptionAnything
-from caption_anything.utils.image_editing_utils import create_bubble_frame
 from caption_anything.utils.utils import mask_painter, seg_model_map, prepare_segmenter, image_resize
 from caption_anything.utils.parser import parse_augment
 from caption_anything.captioner import build_captioner
-from caption_anything.text_refiner import build_text_refiner
 from caption_anything.segmenter import build_segmenter
-from chatbox import ConversationBot, build_chatbot_tools, get_new_image_name
+from backend.chatbox import ConversationBot, build_chatbot_tools, get_new_image_name
 from segment_anything import sam_model_registry
 import easyocr
 import re
-import edge_tts
 from langchain import __version__
-import torch
-from transformers import AutoProcessor, SiglipModel
-import faiss
-from huggingface_hub import hf_hub_download
-from datasets import load_dataset
 import pandas as pd
 import requests
-import spaces
-# Print the current version of LangChain
-print(f"Current LangChain version: {__version__}")
-print("testing testing")
-
-
-
-# import tts 
-
-###############################################################################
-############# this part is for 3D generate #############
-###############################################################################
-
-
-# import spaces  #
-# import threading
-
-# lock = threading.Lock()
 import os
-# import uuid
-# from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
-# from diffusers.utils import export_to_video
-# from safetensors.torch import load_file
-#from diffusers.models.modeling_outputs import Transformer2DModelOutput
-
-
-import random
-import uuid
 import json
-from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
-
-
-
-
-import imageio
 import numpy as np
-import torch
-import rembg
 from PIL import Image
-from torchvision.transforms import v2
-from pytorch_lightning import seed_everything
-from omegaconf import OmegaConf
-from einops import rearrange, repeat
-from tqdm import tqdm
-from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
-
-from src.utils.train_util import instantiate_from_config
-from src.utils.camera_util import (
-    FOV_to_intrinsics, 
-    get_zero123plus_input_cameras,
-    get_circular_camera_poses,
-)
-from src.utils.mesh_util import save_obj, save_glb
-from src.utils.infer_util import remove_background, resize_foreground, images_to_video
-
-import tempfile
-from functools import partial
-
-from huggingface_hub import hf_hub_download
-
-
-
-
-# def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
-#     """
-#     Get the rendering camera parameters.
-#     """
-#     c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
-#     if is_flexicubes:
-#         cameras = torch.linalg.inv(c2ws)
-#         cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
-#     else:
-#         extrinsics = c2ws.flatten(-2)
-#         intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
-#         cameras = torch.cat([extrinsics, intrinsics], dim=-1)
-#         cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
-#     return cameras
-
-
-# def images_to_video(images, output_path, fps=30):
-#     # images: (N, C, H, W)
-#     os.makedirs(os.path.dirname(output_path), exist_ok=True)
-#     frames = []
-#     for i in range(images.shape[0]):
-#         frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
-#         assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
-#             f"Frame shape mismatch: {frame.shape} vs {images.shape}"
-#         assert frame.min() >= 0 and frame.max() <= 255, \
-#             f"Frame value out of range: {frame.min()} ~ {frame.max()}"
-#         frames.append(frame)
-#     imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')
-
-
-# ###############################################################################
-# # Configuration.
-# ###############################################################################
-
-# import shutil
-
-# def find_cuda():
-#     # Check if CUDA_HOME or CUDA_PATH environment variables are set
-#     cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
-
-#     if cuda_home and os.path.exists(cuda_home):
-#         return cuda_home
-
-#     # Search for the nvcc executable in the system's PATH
-#     nvcc_path = shutil.which('nvcc')
-
-#     if nvcc_path:
-#         # Remove the 'bin/nvcc' part to get the CUDA installation path
-#         cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
-#         return cuda_path
-
-#     return None
-
-# cuda_path = find_cuda()
-
-# if cuda_path:
-#     print(f"CUDA installation found at: {cuda_path}")
-# else:
-#     print("CUDA installation not found")
-
-# config_path = 'configs/instant-nerf-base.yaml'
-# config = OmegaConf.load(config_path)
-# config_name = os.path.basename(config_path).replace('.yaml', '')
-# model_config = config.model_config
-# infer_config = config.infer_config
-
-# IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
-
-# device = torch.device('cuda')
-
-# # load diffusion model
-# print('Loading diffusion model ...')
-# pipeline = DiffusionPipeline.from_pretrained(
-#     "sudo-ai/zero123plus-v1.2", 
-#     custom_pipeline="zero123plus",
-#     torch_dtype=torch.float16,
-# )
-# pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
-#     pipeline.scheduler.config, timestep_spacing='trailing'
-# )
-
-# # load custom white-background UNet
-# unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
-# state_dict = torch.load(unet_ckpt_path, map_location='cpu')
-# pipeline.unet.load_state_dict(state_dict, strict=True)
-
-# pipeline = pipeline.to(device)
-
-# # load reconstruction model
-# print('Loading reconstruction model ...')
-# model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_nerf_base.ckpt", repo_type="model")
-# model0 = instantiate_from_config(model_config)
-# state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
-# state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
-# model0.load_state_dict(state_dict, strict=True)
-
-# model0 = model0.to(device)
-
-# print('Loading Finished!')
-
-
-# def check_input_image(input_image):
-#     if input_image is None:
-#         raise gr.Error("No image uploaded!")
-#         image = None
-#     else:
-#         image = Image.open(input_image)
-#     return image
-
-# def preprocess(input_image, do_remove_background):
-
-#     rembg_session = rembg.new_session() if do_remove_background else None
-
-#     if do_remove_background:
-#         input_image = remove_background(input_image, rembg_session)
-#         input_image = resize_foreground(input_image, 0.85)
-
-#     return input_image
-
+from backend.prompts import generate_prompt
+from backend.recommendation import RecommendationConfig, ImageRecommender
+from backend.gpt_service import get_gpt_response, get_artistinfo, get_yearinfo
+from backend.texttospeech.tts import texttospeech
 
-# # @spaces.GPU
-# def generate_mvs(input_image, sample_steps, sample_seed):
+recommendation_config = RecommendationConfig()
+recommender = ImageRecommender(recommendation_config)
 
-#     seed_everything(sample_seed)
-    
-#     # sampling
-#     z123_image = pipeline(
-#         input_image, 
-#         num_inference_steps=sample_steps
-#     ).images[0]
-
-#     show_image = np.asarray(z123_image, dtype=np.uint8)
-#     show_image = torch.from_numpy(show_image)     # (960, 640, 3)
-#     show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
-#     show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
-#     show_image = Image.fromarray(show_image.numpy())
-
-#     return z123_image, show_image
-
-
-# # @spaces.GPU
-# def make3d(images):
-
-#     global model0
-#     if IS_FLEXICUBES:
-#         model0.init_flexicubes_geometry(device)
-#     model0 = model0.eval()
-
-#     images = np.asarray(images, dtype=np.float32) / 255.0
-#     images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()     # (3, 960, 640)
-#     images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2)        # (6, 3, 320, 320)
-
-#     input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
-#     render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
-
-#     images = images.unsqueeze(0).to(device)
-#     images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
-
-#     mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
-#     print(mesh_fpath)
-#     mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
-#     mesh_dirname = os.path.dirname(mesh_fpath)
-#     video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
-#     mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")
-
-#     with torch.no_grad():
-#         # get triplane
-#         planes = model0.forward_planes(images, input_cameras)
-
-#         # # get video
-#         # chunk_size = 20 if IS_FLEXICUBES else 1
-#         # render_size = 384
-        
-#         # frames = []
-#         # for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
-#         #     if IS_FLEXICUBES:
-#         #         frame = model.forward_geometry(
-#         #             planes,
-#         #             render_cameras[:, i:i+chunk_size],
-#         #             render_size=render_size,
-#         #         )['img']
-#         #     else:
-#         #         frame = model.synthesizer(
-#         #             planes,
-#         #             cameras=render_cameras[:, i:i+chunk_size],
-#         #             render_size=render_size,
-#         #         )['images_rgb']
-#         #     frames.append(frame)
-#         # frames = torch.cat(frames, dim=1)
-
-#         # images_to_video(
-#         #     frames[0],
-#         #     video_fpath,
-#         #     fps=30,
-#         # )
-
-#         # print(f"Video saved to {video_fpath}")
-
-#         # get mesh
-#         mesh_out = model0.extract_mesh(
-#             planes,
-#             use_texture_map=False,
-#             **infer_config,
-#         )
-
-#         vertices, faces, vertex_colors = mesh_out
-#         vertices = vertices[:, [1, 2, 0]]
-        
-#         save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
-#         save_obj(vertices, faces, vertex_colors, mesh_fpath)
-        
-#         print(f"Mesh saved to {mesh_fpath}")
-
-#     return mesh_fpath, mesh_glb_fpath
-
-
-###############################################################################
-############# above part is for 3D generate #############
-###############################################################################
-
-
-###############################################################################
-############# This part is for sCLIP #############
-###############################################################################
-
-# download model and dataset
-hf_hub_download("merve/siglip-faiss-wikiart", "siglip_10k_latest.index", local_dir="./")
-hf_hub_download("merve/siglip-faiss-wikiart", "wikiart_10k_latest.csv", local_dir="./")
-
-# read index, dataset and load siglip model and processor
-index = faiss.read_index("./siglip_10k_latest.index")
-df = pd.read_csv("./wikiart_10k_latest.csv")
-device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
-processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
-slipmodel = SiglipModel.from_pretrained("google/siglip-base-patch16-224").to(device)
-
-
-def read_image_from_url(url):
-    response = requests.get(url)
-    img = Image.open(BytesIO(response.content)).convert("RGB")
-    return img
-
-#@spaces.GPU
-def extract_features_siglip(image):
-    with torch.no_grad():
-        inputs = processor(images=image, return_tensors="pt").to(device)
-        image_features = slipmodel.get_image_features(**inputs)
-    return image_features
-
-@spaces.GPU
-def infer(crop_image_path,full_image_path,state,language,task_type=None):
-    print("task type",task_type)
-    style_gallery_output = []
-    item_gallery_output=[]
-
-    if task_type=="task 1":
-        item_gallery_output.append("recomendation_pic/1.8.jpg")
-        item_gallery_output.append("recomendation_pic/1.9.jpg")
-        input_image = Image.open(full_image_path).convert("RGB")
-        input_features = extract_features_siglip(input_image.convert("RGB"))
-        input_features = input_features.detach().cpu().numpy()
-        input_features = np.float32(input_features)
-        faiss.normalize_L2(input_features)
-        distances, indices = index.search(input_features, 2)      
-        for i,v in enumerate(indices[0]):
-            sim = -distances[0][i]
-            image_url = df.iloc[v]["Link"]
-            img_retrieved = read_image_from_url(image_url)
-            style_gallery_output.append(img_retrieved)
-        if language=="English":
-            msg="🖼️ Please refer to the section below to see the recommended results."
-        else:
-            msg="🖼️  请到下方查看推荐结果。"
-        state+=[(None,msg)]
-
-        return item_gallery_output, style_gallery_output,state,state
-    elif task_type=="task 2":
-        item_gallery_output.append("recomendation_pic/2.8.jpg")
-        item_gallery_output.append("recomendation_pic/2.9.png")
-        input_image = Image.open(full_image_path).convert("RGB")
-        input_features = extract_features_siglip(input_image.convert("RGB"))
-        input_features = input_features.detach().cpu().numpy()
-        input_features = np.float32(input_features)
-        faiss.normalize_L2(input_features)
-        distances, indices = index.search(input_features, 2)      
-        for i,v in enumerate(indices[0]):
-            sim = -distances[0][i]
-            image_url = df.iloc[v]["Link"]
-            img_retrieved = read_image_from_url(image_url)
-            style_gallery_output.append(img_retrieved)
-        if language=="English":
-            msg="🖼️ Please refer to the section below to see the recommended results."
-        else:
-            msg="🖼️  请到下方查看推荐结果。"
-        state+=[(None,msg)]
-
-        return item_gallery_output, style_gallery_output,state,state
-    
-    elif task_type=="task 3":
-        item_gallery_output.append("recomendation_pic/3.8.png")
-        item_gallery_output.append("recomendation_pic/basket-2.png")
-        input_image = Image.open(full_image_path).convert("RGB")
-        input_features = extract_features_siglip(input_image.convert("RGB"))
-        input_features = input_features.detach().cpu().numpy()
-        input_features = np.float32(input_features)
-        faiss.normalize_L2(input_features)
-        distances, indices = index.search(input_features, 2)      
-        for i,v in enumerate(indices[0]):
-            sim = -distances[0][i]
-            image_url = df.iloc[v]["Link"]
-            img_retrieved = read_image_from_url(image_url)
-            style_gallery_output.append(img_retrieved)
-        if language=="English":
-            msg="🖼️ Please refer to the section below to see the recommended results."
-        else:
-            msg="🖼️  请到下方查看推荐结果。"
-        state+=[(None,msg)]
-
-        return item_gallery_output, style_gallery_output,state,state
-    
-    elif crop_image_path:
-        input_image = Image.open(crop_image_path).convert("RGB")
-        input_features = extract_features_siglip(input_image.convert("RGB"))
-        input_features = input_features.detach().cpu().numpy()
-        input_features = np.float32(input_features)
-        faiss.normalize_L2(input_features)
-        distances, indices = index.search(input_features, 2)      
-        for i,v in enumerate(indices[0]):
-            sim = -distances[0][i]
-            image_url = df.iloc[v]["Link"]
-            img_retrieved = read_image_from_url(image_url)
-            item_gallery_output.append(img_retrieved)
-
-        input_image = Image.open(full_image_path).convert("RGB")
-        input_features = extract_features_siglip(input_image.convert("RGB"))
-        input_features = input_features.detach().cpu().numpy()
-        input_features = np.float32(input_features)
-        faiss.normalize_L2(input_features)
-        distances, indices = index.search(input_features, 2)
-        for i,v in enumerate(indices[0]):
-            sim = -distances[0][i]
-            image_url = df.iloc[v]["Link"]
-            img_retrieved = read_image_from_url(image_url)
-            style_gallery_output.append(img_retrieved)
-        if language=="English":
-            msg="🖼️ Please refer to the section below to see the recommended results."
-        else:
-            msg="🖼️  请到下方查看推荐结果。"
-        state+=[(None,msg)]
-
-        return item_gallery_output, style_gallery_output,state,state
-    else:
-        input_image = Image.open(full_image_path).convert("RGB")
-        input_features = extract_features_siglip(input_image.convert("RGB"))
-        input_features = input_features.detach().cpu().numpy()
-        input_features = np.float32(input_features)
-        faiss.normalize_L2(input_features)
-        distances, indices = index.search(input_features, 4)
-        for i,v in enumerate(indices[0]):
-            sim = -distances[0][i]
-            image_url = df.iloc[v]["Link"]
-            img_retrieved = read_image_from_url(image_url)
-            style_gallery_output.append(img_retrieved)
-        if language=="English":
-            msg="🖼️ Please refer to the section below to see the recommended results."
-        else:
-            msg="🖼️  请到下方查看推荐结果。"
-        state+=[(None,msg)]
-
-        return item_gallery_output, style_gallery_output,state,state
-        
-
-
-###############################################################################
-############# Above part is for sCLIP #############
-###############################################################################
-
-
-###############################################################################
-############# this part is for text to image #############
-###############################################################################
-
-# # Use environment variables for flexibility
-MODEL_ID = os.getenv("MODEL_ID", "sd-community/sdxl-flash")
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
-USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
-ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
-BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1"))  # Allow generating multiple images at once
-
-# # Determine device and load model outside of function for efficiency
-# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# pipe = StableDiffusionXLPipeline.from_pretrained(
-#     MODEL_ID,
-#     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-#     use_safetensors=True,
-#     add_watermarker=False,
-# ).to(device)
-# pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
-
-# # Torch compile for potential speedup (experimental)
-# if USE_TORCH_COMPILE:
-#     pipe.compile()
-
-# # CPU offloading for larger RAM capacity (experimental)
-# if ENABLE_CPU_OFFLOAD:
-#     pipe.enable_model_cpu_offload()
-
-MAX_SEED = np.iinfo(np.int32).max
-
-# def save_image(img):
-#     unique_name = str(uuid.uuid4()) + ".png"
-#     img.save(unique_name)
-#     return unique_name
-
-# def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
-#     if randomize_seed:
-#         seed = random.randint(0, MAX_SEED)
-#     return seed
-
-# @spaces.GPU(duration=30, queue=False)
-# def generate(
-#     prompt: str,
-#     negative_prompt: str = "",
-#     use_negative_prompt: bool = False,
-#     seed: int = 1,
-#     width: int = 200,
-#     height: int = 200,
-#     guidance_scale: float = 3,
-#     num_inference_steps: int = 30,
-#     randomize_seed: bool = False,
-#     num_images: int = 4,  # Number of images to generate
-#     use_resolution_binning: bool = True, 
-#     progress=gr.Progress(track_tqdm=True),
-# ):
-#     seed = int(randomize_seed_fn(seed, randomize_seed))
-#     generator = torch.Generator(device=device).manual_seed(seed)
-
-#     # Improved options handling
-#     options = {
-#         "prompt": [prompt] * num_images,
-#         "negative_prompt": [negative_prompt] * num_images if use_negative_prompt else None,
-#         "width": width,
-#         "height": height,
-#         "guidance_scale": guidance_scale,
-#         "num_inference_steps": num_inference_steps,
-#         "generator": generator,
-#         "output_type": "pil",
-#     }
-
-#     # Use resolution binning for faster generation with less VRAM usage
-#     # if use_resolution_binning:
-#     #     options["use_resolution_binning"] = True
-
-#     # Generate images potentially in batches
-#     images = []
-#     for i in range(0, num_images, BATCH_SIZE):
-#         batch_options = options.copy()
-#         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
-#         if "negative_prompt" in batch_options:
-#             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
-#         images.extend(pipe(**batch_options).images)
-
-#     image_paths = [save_image(img) for img in images]
-#     return image_paths, seed
-
-# examples = [
-#     "a cat eating a piece of cheese",
-#     "a ROBOT riding a BLUE horse on Mars, photorealistic, 4k",
-#     "Ironman VS Hulk, ultrarealistic",
-#     "Astronaut in a jungle, cold color palette, oil pastel, detailed, 8k",
-#     "An alien holding a sign board containing the word 'Flash', futuristic, neonpunk",
-#     "Kids going to school, Anime style"
-# ]
-
-
-
-
-###############################################################################
-############# above part is for text to image #############
-###############################################################################
-
-
-print("4")
-
-css = """
-#warning {background-color: #FFCCCB}
-.tools_button {
-    display: flex;
-    flex-direction: column;
-    align-items: center;
-    justify-content: center;
-    background: white;
-    border: none !important;
-    box-shadow: none !important;
-    text-align: center;
-    color: black;
-}
-
-.tools_button_clicked {
-    display: flex;
-    flex-direction: column;
-    align-items: center;
-    justify-content: center;
-    background: white;
-    border: none !important;
-    box-shadow: none !important;
-    text-align: center;
-    color: rgb(18,150,219);
-}
-
-.tools_button_add {
-    display: flex;
-    flex-direction: column;
-    align-items: center;
-    justify-content: center;
-    background: white;
-    border: none !important;
-    box-shadow: none !important;
-    text-align: center;
-    color: rgb(18,150,219);
-}
-
-
-.info_btn {
-    background: rgb(245, 245, 245) !important;
-    border: none !important;
-    box-shadow: none !important;
-    font-size: 15px !important;
-    min-width: 6rem !important;
-    max-width: 10rem !important;
-}
-
-.info_btn_interact {
-    background: rgb(217, 217, 217) !important;
-    box-shadow: none !important;
-    font-size: 15px !important;
-    min-width: 6rem !important;
-    max-width: 10rem !important;
-}
-
-.function_button {
-    background: rgb(227, 226, 226) !important;
-    border: none !important;
-    box-shadow: none !important;
-}
-
-.function_button_rec {
-    background: rgb(189, 189, 189) !important;
-    border: none !important;
-    box-shadow: none !important;
-}
-
-.small_button {
-    font-size: 12px !important;
-    padding: 2px 8px !important;
-    min-width: 60px !important;
-    height: 30px !important;
-}
-
-#tool_box {max-width: 50px}
- 
-"""
-filtered_language_dict = {
-    'English': {'female': 'en-US-JennyNeural', 'male': 'en-US-GuyNeural'},
-    'Chinese': {'female': 'zh-CN-XiaoxiaoNeural', 'male': 'zh-CN-YunxiNeural'},
-    'French': {'female': 'fr-FR-DeniseNeural', 'male': 'fr-FR-HenriNeural'},
-    'Spanish': {'female': 'es-MX-DaliaNeural', 'male': 'es-MX-JorgeNeural'},
-    'Arabic': {'female': 'ar-SA-ZariyahNeural', 'male': 'ar-SA-HamedNeural'},
-    'Portuguese': {'female': 'pt-BR-FranciscaNeural', 'male': 'pt-BR-AntonioNeural'},
-    'Cantonese': {'female': 'zh-HK-HiuGaaiNeural', 'male': 'zh-HK-WanLungNeural'}
-}
-
-focus_map = {                                                     
-"Describe":0, 
-"D+Analysis":1, 
-"DA+Interprete":2,
-"Judge":3
-}
-
-
-
-# prompt_list = [
-#     [
-#         'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
-#         'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact that describes the theme and content you see, and one analysis of the techniques used in the work (shape, color, texture, form principles) as markdown outline with appropriate emojis. Each point listed is to be in {language} language, with a response length of about {length} words.',
-#         "Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact that describes the theme and content, one analysis of the techniques used in the work (shape, color, texture, form principles), and one interpretation that explores the deeper meaning and the artist's intentions (thoughts, emotions, concepts) as a markdown outline with appropriate emojis. Each point listed is to be in {language} language, with a response length of about {length} words.",
-#         'Wiki_caption: {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
-#     ],
-#     [
-#         "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-#         "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact and one analysis of the techniques used in the work (shape, color, texture, form principles) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-#         "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact, one analysis of the techniques used in the work (shape, color, texture, form principles), and one interpretation that explores the deeper meaning and the artist's intentions (thoughts, emotions, concepts) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-#         "When generating the answer, you should tell others that you are one of the creators of these paintings and generate the text in the tone and manner as if you are the creator of the painting. According to image and wiki_caption {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-#     ],
-#     [
-#         'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
-#         "When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis of the techniques used in the work (shape, color, texture, form principles). Each point should be formatted as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-#         "When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis of the techniques used in the work (shape, color, texture, form principles) and and one interpretation that explores the deeper meaning and the artist's intentions (thoughts, emotions, concepts). Each point should be formatted as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I.  Each point listed is to be in {language} language, with a response length of about {length} words.",
-#         'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. According to image and wiki_caption {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
-#     ]
-# ]
-
-prompt_list = [
-    [
-        'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
-        'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption.  Each point listed is to be in {language} language, with a response length of about {length} words.',
-        'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
-        'Wiki_caption: {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
-    ],
-    [
-        "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-        "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact and one analysis from art appreciation perspective as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-        "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact, one analysis, and one interpret from art appreciation perspective as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-        "When generating the answer, you should tell others that you are one of the creators of these paintings and generate the text in the tone and manner as if you are the creator of the painting. According to image and wiki_caption {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
-    ],
-    [
-        'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
-        'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis from art appreciation perspective as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
-        'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis from art appreciation perspective and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I.  Each point listed is to be in {language} language, with a response length of about {length} words.',
-        'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. According to image and wiki_caption {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
-    ]
-]
-
-recommendation_prompt=[
-    
-    [
-    '''
-    First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting: 
-    Recommendation reason: {{Recommendation based on {{object}} in the painting you saw earlier. Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate in three points. }}
-    Each bullet point should be in {language} language, with a response length of about {length} words.
-    ''',
-    '''
-    When generating answers, you should tell people that I am the creator of painting you were looking at earlier itself, and generate text in the tone and manner in which you are the creator of painting were looking at earlier. 
-
-    First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting: 
-
-    Recommendation reason: {{I'm the creator of that painting you saw earlier. I'm an artist. and I'm recommending this painting based on the fact that the {{object}} I've drawn also appear in the painting you're looking at. }} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the creator of painting were looking at earlier and start every sentence with I.
-
-    Each bullet point should be in {language} language, with a response length of about {length} words.
-
-    ''',
-    '''
-    When generating answers, you should tell people that you are the object itself that was selected in the painting, and generate text in the tone and manner in which you are the object 
-
-    First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting: 
-
-    Recommendation reason: {{I'm the {{object}} in the painting you were looking at earlier, and I'm recommending this painting based on the fact that I'm also present in the one you're looking at.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the object of this painting and start every sentence with I. 
-
-    Each bullet point should be in {language} language, with a response length of about {length} words.
-
-    '''],
-    
-    [
-    '''
-    First identify what the name of the first painting is, you save yourself as the parameter: {{name}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting: 
-    Recommendation reason: {{Recommendation based on the painting {{name}}.Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate in three points.}} 
-    Each bullet point should be in {language} language, with a response length of about {length} words.
-    ''',
-    '''
-    When generating answers, you should tell people that I am the creator of painting you were looking at earlier itself, and generate text in the tone and manner in which you are the creator of painting were looking at earlier. 
-
-    First identify what the creator of the first painting is, you save yourself as the parameter: {artist}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting: 
-
-    Recommendation reason: {{I'm the creator of that painting you saw earlier, {artist}. I'm an artist. and I'm recommending this painting based on the fact that the painting you're looking at is similar to the one you just saw of me.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the creator of painting were looking at earlier and start every sentence with I.
-
-    Each bullet point should be in {language} language, with a response length of about {length} words.
-
-    ''',
-    '''
-    When generating answers, you should tell people that I am the painting you were looking at earlier itself, and generate text in the tone and manner in which you are the painting were looking at earlier. 
-
-    First identify what the name of the first painting is, you save yourself as the parameter: {{name}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting: 
-
-    Recommendation reason: {{I'm the painting {{name}} you were looking at earlier, and I'm recommending this painting based on the fact that I'm similar to the one you're looking at.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the painting were looking at earlier and start every sentence with I. 
-
-    Each bullet point should be in {language} language, with a response length of about {length} words.
-
-    '''],
-    
-    
-    
-
-]   
-    
-gpt_state = 0
-VOICE = "en-GB-SoniaNeural"
-article = """
-<div style='margin:20px auto;'>
-<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
-</div>
-"""
 
 args = parse_augment()
 args.segmenter = "huge"
@@ -835,30 +83,6 @@ try:
 except Exception as e:
     print(f"Error in building chatbot tools: {e}")
 
-print(5)
-
-# class ImageSketcher(gr.Image):
-#     """
-#     Fix the bug of gradio.Image that cannot upload with tool == 'sketch'.
-#     """
-
-#     is_template = True  # Magic to make this work with gradio.Block, don't remove unless you know what you're doing.
-
-#     def __init__(self, **kwargs):
-#         super().__init__(**kwargs)
-
-#     def preprocess(self, x):
-#         if self.tool == 'sketch' and self.source in ["upload", "webcam"]:
-#             assert isinstance(x, dict)
-#             if x['mask'] is None:
-#                 decode_image = processing_utils.decode_base64_to_image(x['image'])
-#                 width, height = decode_image.size
-#                 mask = np.zeros((height, width, 4), dtype=np.uint8)
-#                 mask[..., -1] = 255
-#                 mask = self.postprocess(mask)
-#                 x['mask'] = mask
-                
-#         return super().preprocess(x)
 
 
 def build_caption_anything_with_models(args, api_key="", captioner=None, sam_model=None, ocr_reader=None, text_refiner=None,
@@ -885,15 +109,12 @@ def validate_api_key(api_key):
 
 
 def init_openai_api_key(api_key=""):
-    # api_key = 'sk-proj-bxHhgjZV8TVgd1IupZrUT3BlbkFJvrthq6zIxpZVk3vwsvJ9'
     text_refiner = None
     visual_chatgpt = None
     if api_key and len(api_key) > 30:
         print(api_key)
         if validate_api_key(api_key):
             try:
-                # text_refiner = build_text_refiner(args.text_refiner, args.device, args, api_key)
-                # assert len(text_refiner.llm('hi')) > 0 # test
                 text_refiner = None
                 print("text refiner")
                 visual_chatgpt = ConversationBot(shared_chatbot_tools, api_key=api_key)
@@ -906,24 +127,12 @@ def init_openai_api_key(api_key=""):
     else:
         print("API key is too short.")
     print(text_refiner)
-    openai_available = text_refiner is not None
-    if visual_chatgpt:
 
-        global gpt_state
-        gpt_state=1
-        # return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
-        return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]* 3 + [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*4+[gr.update(visible=False)]+[gr.update(visible=True)]*2
+    if visual_chatgpt:
+        return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]* 3 + [gr.update(visible=False)]*2 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*4+[gr.update(visible=False)]+[gr.update(visible=True)]*2
     else:
-        gpt_state=0
-        # return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
-        return [gr.update(visible=False)]*6 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*7
+        return [gr.update(visible=False)]*5 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*7
         
-def init_wo_openai_api_key():
-        global gpt_state
-        gpt_state=0
-        # return  [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*3
-        return  [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*3
-
         
 def get_click_prompt(chat_input, click_state, click_mode):
     inputs = json.loads(chat_input)
@@ -1011,19 +220,6 @@ async def upload_callback(image_input,state, log_state, task_type, visual_chatgp
     click_state = [[], [], []]
     
     
-    # width, height = image_input.size
-    
-    # target_width=500
-    # target_height=650
-    
-    # width_ratio = target_width / width
-    # height_ratio = target_height / height
-    # ratio = min(width_ratio, height_ratio)
-
-    # if ratio < 1.0:
-    #     new_size = (int(width * ratio), int(height * ratio))
-    #     image_input = image_input.resize(new_size, Image.ANTIALIAS)
-    
     image_input = image_resize(image_input, res=1024)
 
     model = build_caption_anything_with_models(
@@ -1114,8 +310,8 @@ async def upload_callback(image_input,state, log_state, task_type, visual_chatgp
 
                                                                                                                                                                                                                                                                                          
 
-    return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
-        original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist, gender,new_image_path,log_state,history,audio_output]
+    return [state, state, image_input, click_state]+[image_input]*1 + [image_embedding, \
+        original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*1 + [paragraph,artist, gender,new_image_path,log_state,history,audio_output]
 
 
 
@@ -1156,8 +352,7 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
     # state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
     
       
-    
-    print(prompt["input_label"][-1])
+
     if language=="English":
         if prompt["input_label"][-1]==1:
             msg="You've added an area at {}. ".format(prompt["input_point"][-1])
@@ -1259,17 +454,7 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
         read_info = emoji.replace_emoji(read_info,replace="")    
         print("read info",read_info)
         gender="male"
-        # if naritive_mapping[naritive]==2:
-        #     parsed_data = get_gpt_response(openai_api_key, new_crop_save_path,prompt = f"Based on the information {focus_info}, return the gender of this item, returns its most likely gender, do not return unknown, in the format {{\"gender\": \"<gender>\"}}")
-            
-        #     try:
-        #         parsed_data = json.loads(parsed_data)
-        #         gender=parsed_data['gender']
-        #         gender=gender.lower()
-        #     except:
-        #         gender="male"
-        #         print("error gpt responese")
-        #     print("item gender",gender)
+
 
         try:
             if autoplay==False:
@@ -1292,231 +477,7 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
         return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,None,log_state,history
 
 
-naritive_mapping = {"Narrator": 0, "Artist": 1, "In-Situ": 2}
-
-def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language,naritive):
-    
-    mapped_value = focus_map.get(focus_type, -1)
-
-    controls = {
-        'length': length,
-        'sentiment': sentiment,
-        'factuality': factuality,
-        'language': language
-    }
-    
-    naritive_value=naritive_mapping[naritive]
-
-    if mapped_value != -1:
-        prompt = prompt_list[naritive_value][mapped_value].format(
-            Wiki_caption=paragraph,
-            length=controls['length'],
-            sentiment=controls['sentiment'],
-            language=controls['language']
-        )
-    else:
-        prompt = "Invalid focus type."
-
-    # if controls['factuality'] == "Imagination":
-    #     prompt += " Assuming that I am someone who has viewed a lot of art and has a lot of experience viewing art. Explain artistic features (composition, color, style, or use of light) and discuss the symbolism of the content and its influence on later artistic movements."
-
-    return prompt
-
-
-def encode_image(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')
-    
-def get_gpt_response(api_key, image_path, prompt, history=None):
-
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {api_key}"
-    }
-    
-    if history:
-        if len(history) > 4:
-            history = history[-4:]
-    else:
-        history = []
-    
-    messages = history[:]
-    base64_images = []
-    
-    if image_path:
-        if isinstance(image_path, list):
-            for img in image_path:
-                base64_image = encode_image(img)
-                base64_images.append(base64_image)
-        else:
-            base64_image = encode_image(image_path)
-            base64_images.append(base64_image)
-        
-        messages.append({
-            "role": "user",
-            "content": [
-                    {
-                        "type": "text",
-                        "text": prompt
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/jpeg;base64,{base64_images}"
-                        }
-                    }
-                ]
-        })
-    else: 
-        messages.append({
-            "role": "user",
-            "content": prompt
-        })
-                
-    payload = {
-        "model": "gpt-4o",
-        "messages": messages,
-        "max_tokens": 600
-    }
-    
-
-    # Sending the request to the OpenAI API
-    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
-    result = response.json()
-    print("gpt result",result)
-    try:
-        content = result['choices'][0]['message']['content']
-        if content.startswith("```json"):
-                content = content[7:]
-        if content.endswith("```"):
-            content = content[:-3]
-        return content
-    except (KeyError, IndexError, json.JSONDecodeError) as e:
-        return json.dumps({"error": "Failed to parse model output", "details": str(e)})
-
-
-
-def get_sketch_prompt(mask: Image.Image):
-    """
-    Get the prompt for the sketcher.
-    TODO: This is a temporary solution. We should cluster the sketch and get the bounding box of each cluster.
-    """
-
-    mask = np.asarray(mask)[..., 0]
-
-    # Get the bounding box of the sketch
-    y, x = np.where(mask != 0)
-    x1, y1 = np.min(x), np.min(y)
-    x2, y2 = np.max(x), np.max(y)
-
-    prompt = {
-        'prompt_type': ['box'],
-        'input_boxes': [
-            [x1, y1, x2, y2]
-        ]
-    }
-
-    return prompt
-
-submit_traj=0
-
-# async def inference_traject(naritive, origin_image,sketcher_image, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
-#                       original_size, input_size, text_refiner,focus_type,paragraph,openai_api_key,autoplay,trace_type):
-#     image_input, mask = sketcher_image['background'], sketcher_image['layers'][0]
-    
-#     crop_save_path=""
-    
-#     prompt = get_sketch_prompt(mask)
-#     boxes = prompt['input_boxes']
-#     boxes = boxes[0]
-
-#     controls = {'length': length,
-#                 'sentiment': sentiment,
-#                 'factuality': factuality,
-#                 'language': language}
-
-#     model = build_caption_anything_with_models(
-#         args,
-#         api_key="",
-#         captioner=shared_captioner,
-#         sam_model=shared_sam_model,
-#         ocr_reader=shared_ocr_reader,
-#         text_refiner=text_refiner,
-#         session_id=iface.app_id
-#     )
-
-#     model.setup(image_embedding, original_size, input_size, is_image_set=True)
-
-#     enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
-#     out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki,verbose=True)[0]
-    
-#     print(trace_type)
-    
-#     if trace_type=="Trace+Seg":
-#         input_mask = np.array(out['mask'].convert('P'))
-#         image_input = mask_painter(np.array(image_input), input_mask, background_alpha=0)
-#         d3_input=mask_painter(np.array(image_input), input_mask)
-#         crop_save_path=out['crop_save_path']
-    
-#     else:
-#         image_input = Image.fromarray(np.array(origin_image))
-#         draw = ImageDraw.Draw(image_input)
-#         draw.rectangle(boxes, outline='red', width=2) 
-#         d3_input=image_input
-#         cropped_image = origin_image.crop(boxes)
-#         cropped_image.save('temp.png')
-#         crop_save_path='temp.png'
-    
-#     print("crop_svae_path",out['crop_save_path'])
-        
-#     # Update components and states
-#     state.append((f'Box: {boxes}', None))
-    
-#     # fake_click_index = (int((boxes[0][0] + boxes[0][2]) / 2), int((boxes[0][1] + boxes[0][3]) / 2))
-#     # image_input = create_bubble_frame(image_input, "", fake_click_index, input_mask)
-
-#     prompt=generate_prompt(focus_type, paragraph, length, sentiment, factuality, language,naritive)
 
-    
-#     # if not args.disable_gpt and text_refiner:
-#     if not args.disable_gpt:
-#         focus_info=get_gpt_response(openai_api_key,crop_save_path,prompt)
-#         if focus_info.startswith('"') and focus_info.endswith('"'):
-#             focus_info=focus_info[1:-1]
-#         focus_info=focus_info.replace('#', '')
-#         state = state + [(None, f"{focus_info}")]
-#         print("new_cap",focus_info)
-#         read_info = re.sub(r'[#[\]!*]','',focus_info)
-#         read_info = emoji.replace_emoji(read_info,replace="")    
-#         print("read info",read_info)
-
-#         # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
-#         #                                           input_points=input_points, input_labels=input_labels)
-#         try:
-#             audio_output = await texttospeech(read_info, language,autoplay,gender)
-#             # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
-#             return state, state,image_input,audio_output,crop_save_path,d3_input
-
-
-#         except Exception as e:
-#             state = state + [(None, f"Error during TTS prediction: {str(e)}")]
-#             print(f"Error during TTS prediction: {str(e)}")
-#             # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
-#             return state, state, image_input,audio_output,crop_save_path
-
-
-#     else:
-#         try:
-#             audio_output = await texttospeech(focus_info, language, autoplay)
-#             # waveform_visual, audio_output = tts.predict(generated_caption, input_language, input_audio, input_mic, use_mic, agree)
-#             # return state, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
-#             return state, state, image_input,audio_output
-
-
-#         except Exception as e:
-#             state = state + [(None, f"Error during TTS prediction: {str(e)}")]
-#             print(f"Error during TTS prediction: {str(e)}")
-#             return state, state, image_input,audio_output
 
 
 def clear_chat_memory(visual_chatgpt, keep_global=False):
@@ -1565,241 +526,45 @@ def export_chat_log(chat_state,log_list,narrative):
         print(f"An error occurred while exporting the chat log: {e}")
         return None,None
 
-async def get_artistinfo(artist_name,api_key,state,language,autoplay,length,log_state):
-    prompt = f"Provide a concise summary of about {length} words in {language} on the painter {artist_name}, covering his biography, major works, artistic style, significant contributions to the art world, and any major awards or recognitions he has received. Start your response with 'Artist Background: '."
-    res=get_gpt_response(api_key,None,prompt)
-    state = state + [(None, res)]
-    read_info = re.sub(r'[#[\]!*]','',res)
-    read_info = emoji.replace_emoji(read_info,replace="")    
-    log_state=log_state+[(f"res", None)]
-
-
-    # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
-    #                                           input_points=input_points, input_labels=input_labels)
-    if autoplay:
-        audio_output = await texttospeech(read_info, language)
-    # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
-        return state, state,audio_output,log_state
-    return state, state,None,log_state
-
-
-async def get_yearinfo(year,api_key,state,language,autoplay,length,log_state):
-    prompt = f"Provide a concise summary of about {length} words in {language} on the art historical period associated with the year {year}, covering its major characteristics, influential artists, notable works, and its significance in the broader context of art history with 'History Background: '."
-    res=get_gpt_response(api_key,None,prompt)
-    log_state=log_state+[(f"res", None)]
-    state = state + [(None, res)]
-    read_info = re.sub(r'[#[\]!*]','',res)
-    read_info = emoji.replace_emoji(read_info,replace="")    
-
-
-    # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
-    #                                           input_points=input_points, input_labels=input_labels)
-    if autoplay:
-        audio_output = await texttospeech(read_info, language)
-    # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
-        return state, state,audio_output,log_state
-    return state, state,None,log_state
-
-    
-    
     
-
-# async def cap_everything(paragraph, visual_chatgpt,language,autoplay):
-
-#     # state = state + [(None, f"Caption Everything: {paragraph}")]  
-#     Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
-#     AI_prompt = "Received."
-#     visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
+async def get_recommendation(new_crop, image_path, openai_api_key, language, autoplay, length, 
+                           log_state, sort_score, narrative, state, recommend_type, artist, 
+                           recommended_path):
     
-#     # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt 
-#     visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
-#     # waveform_visual, audio_output=tts.predict(paragraph, input_language, input_audio, input_mic, use_mic, agree)
-#     audio_output=await texttospeech(paragraph,language,autoplay)
-#     return paragraph,audio_output
-
-# def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragraph):
+    prompt = recommender.generate_recommendation_prompt(
+        recommend_type=recommend_type,
+        narrative=narrative,
+        language=language,
+        length=length,
+        artist=artist
+    )
     
-#     model = build_caption_anything_with_models(
-#         args,
-#         api_key="",
-#         captioner=shared_captioner,
-#         sam_model=shared_sam_model,
-#         ocr_reader=shared_ocr_reader,
-#         text_refiner=text_refiner,
-#         session_id=iface.app_id
-#     )
-#     paragraph = model.inference_cap_everything(image_input, verbose=True)
-#     # state = state + [(None, f"Caption Everything: {paragraph}")]  
-#     Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
-#     AI_prompt = "Received."
-#     visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
-#     visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
-#     # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt 
-#     return paragraph
-
-
-
-# def handle_liked(state,like_res):
-#     if state:
-#         like_res.append(state[-1][1])
-#         print(f"Last response recorded: {state[-1][1]}")
-#     else:
-#         print("No response to record.")
-#     state = state + [(None, f"Liked Received 👍")]
-#     return state,like_res
-        
-# def handle_disliked(state,dislike_res):
-#     if state:
-#         dislike_res.append(state[-1][1])
-#         print(f"Last response recorded: {state[-1][1]}")
-#     else:
-#         print("No response to record.")
-#     state = state + [(None, f"Disliked Received 🥹")]
-#     return state,dislike_res
+    image_paths = [new_crop, recommended_path] if recommend_type == "Item" else [image_path, recommended_path]
     
+    result = get_gpt_response(openai_api_key, image_paths, prompt)
+    print("recommend result", result)
     
-# def get_style():
-#     current_version = version.parse(gr.__version__)
-#     print(current_version)
-#     if current_version <= version.parse('3.24.1'):
-#         style = '''
-#         #image_sketcher{min-height:500px}
-#         #image_sketcher [data-testid="image"], #image_sketcher [data-testid="image"] > div{min-height: 500px}
-#         #image_upload{min-height:500px}
-#         #image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 500px}
-#         .custom-language {
-#             width: 20%; 
-#         }
-
-#         .custom-autoplay {
-#             width: 40%; 
-#         }
-
-#         .custom-output {
-#             width: 30%; 
-#         }
-
-#         '''
-#     elif current_version <= version.parse('3.27'):
-#         style = '''
-#         #image_sketcher{min-height:500px}
-#         #image_upload{min-height:500px}
-#         .custom-language {
-#             width: 20%; 
-#         }
-
-#         .custom-autoplay {
-#             width: 40%; 
-#         }
-
-#         .custom-output {
-#             width: 30%; 
-#         }
-#         .custom-gallery {
-#             display: flex;
-#             flex-wrap: wrap;
-#             justify-content: space-between;
-#         }
-
-#         .custom-gallery img {
-#             width: 48%;  
-#             margin-bottom: 10px; 
-#         }
-#         '''
-#     else:
-#         style = None
-
-#     return style
-
-# def handle_like_dislike(like_data, like_state, dislike_state):
-#     if like_data.liked:
-#         if like_data.index not in like_state:
-#             like_state.append(like_data.index)
-#             message = f"Liked: {like_data.value} at index {like_data.index}"
-#         else:
-#             message = "You already liked this item"
-#     else:
-#         if like_data.index not in dislike_state:
-#             dislike_state.append(like_data.index)
-#             message = f"Disliked: {like_data.value} at index {like_data.index}"
-#         else:
-#             message = "You already disliked this item"
+    log_message = (
+        "User wants to know object recomendation reason" 
+        if recommend_type == "Item" 
+        else "User wants to know style recomendation reason"
+    )
     
-#     return like_state, dislike_state
-
-async def texttospeech(text, language,gender='female'):
-    try:
+    state += [(None, f"{result}")]
+    log_state += [(log_message, None)]
+    log_state = log_state + [(narrative, None)]
+    log_state = log_state + [(f"image sort ranking {sort_score}", None)]
+    log_state = log_state + [(None, f"{result}")]
 
-        voice = filtered_language_dict[language][gender]
-        communicate = edge_tts.Communicate(text=text, voice=voice,rate="+25%")
-        file_path = "output.wav"
-        await communicate.save(file_path)
-        with open(file_path, "rb") as audio_file:
-            audio_bytes = BytesIO(audio_file.read())
-        audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
-        print("TTS processing completed.")
-        audio_style = 'style="width:210px;"'
-        audio_player = f'<audio src="data:audio/wav;base64,{audio}" controls autoplay {audio_style}></audio>'
-        return audio_player
-    
-    except Exception as e:
-        print(f"Error in texttospeech: {e}")
-        return None
+    read_info = re.sub(r'[#[\]!*]', '', result)
+    read_info = emoji.replace_emoji(read_info, replace="")
+    print("associate", read_info)
 
-async def get_recommendation(new_crop,image_path,openai_api_key,language,autoplay,length,log_state,sort_score,narritive,state,recommend_type,artist,recomended_path):
-    
-    if recommend_type=="Item":
-        persona=naritive_mapping[narritive]
-        prompt=recommendation_prompt[0][persona].format(language=language,length=length)
-        image_paths=[new_crop,recomended_path]
-        result=get_gpt_response(openai_api_key, image_paths, prompt)
-        print("recommend result",result)
-        state += [(None, f"{result}")]
-        log_state += [("User wants to know object recomendation reason", None)]
-        log_state = log_state + [(narritive, None)]
-        log_state = log_state + [(f"image sort ranking {sort_score}", None)]
-        log_state = log_state + [(None, f"{result}")]
-        read_info = re.sub(r'[#[\]!*]','',result)
-        read_info = emoji.replace_emoji(read_info,replace="")  
-        print("associate",read_info)
-        audio_output=None
-        if autoplay:
-            audio_output = await texttospeech(read_info, language)
-        return state,state,audio_output,log_state,index,gr.update(value=[])
-    else:
-        persona=naritive_mapping[narritive]
+    audio_output = None
+    if autoplay:
+        audio_output = await texttospeech(read_info, language)
         
-        if persona==1:
-            prompt=recommendation_prompt[1][persona].format(language=language,length=length,artist=artist[8:])
-        else:
-            prompt=recommendation_prompt[1][persona].format(language=language,length=length)
-        image_paths=[image_path,recomended_path]
-        result=get_gpt_response(openai_api_key, image_paths, prompt )
-        print("recommend result",result)
-        state += [(None, f"{result}")]
-        log_state += [("User wants to know style recomendation reason", None)]
-        log_state = log_state + [(narritive, None)]
-        log_state = log_state + [(f"image sort ranking {sort_score}", None)]
-        log_state = log_state + [(None, f"{result}")]
-        read_info = re.sub(r'[#[\]!*]','',result)
-        read_info = emoji.replace_emoji(read_info,replace="")  
-        print("associate",read_info)
-        audio_output=None
-        if autoplay:
-            audio_output = await texttospeech(read_info, language)
-        return state,state,audio_output,log_state,index,gr.update(value=[])
-
-
-# give the reason of recommendation
-async def item_associate(new_crop,openai_api_key,language,autoplay,length,log_state,sort_score,narritive,state,evt: gr.SelectData):
-    rec_path=evt._data['value']['image']['path']
-    return state,state,None,log_state,None,gr.update(value=[]),rec_path,rec_path,"Item"
-
-
-async def style_associate(image_path,openai_api_key,language,autoplay,length,log_state,sort_score,narritive,state,artist,evt: gr.SelectData):
-    rec_path=evt._data['value']['image']['path']
-    return state,state,None,log_state,None,gr.update(value=[]),rec_path, rec_path,"Style"
-
+    return state, state, audio_output, log_state, None, gr.update(value=[])
 
 def change_naritive(session_type,image_input, state, click_state, paragraph, origin_image,narritive,task_instruct,gallery_output,style_gallery_result,reco_reasons,language="English"):
     if session_type=="Session 1":
@@ -1886,8 +651,9 @@ def toggle_icons_and_update_prompt(point_prompt):
 add_icon_path="assets/icons/plus-square-blue.png"
 minus_icon_path="assets/icons/minus-square.png"      
 
-print("this is a print test")
 
+with open('styles.css', 'r') as file:
+    css = file.read()
 def create_ui():
     print(6)
     title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
@@ -1895,14 +661,9 @@ def create_ui():
     description = """<p>Gradio demo for EyeSee Anything in Art, image to dense captioning generation with various language styles. To use it, simply upload your image, or click one of the examples to load them. """
 
     examples = [
-        ["test_images/1.The Ambassadors.jpg","test_images/task1.jpg","task 1"],
-        ["test_images/2.Football Players.jpg","test_images/task2.jpg","task 2"],
-        ["test_images/3-square.jpg","test_images/task3.jpg","task 3"],
-        # ["test_images/test4.jpg"],
-        # ["test_images/test5.jpg"],
-        # ["test_images/Picture5.png"],
-        
-    ]
+        ["assets/test_images/1.The Ambassadors.jpg","assets/test_images/task1.jpg","task 1"],
+        ["assets/test_images/2.Football Players.jpg","assets/test_images/task2.jpg","task 2"],
+        ["assets/test_images/3-square.jpg","assets/test_images/task3.jpg","task 3"]]
 
     with gr.Blocks(
             css=css,
@@ -1910,7 +671,7 @@ def create_ui():
     ) as iface:
         #display in the chatbox 
         state = gr.State([])
-        # expoer in log
+        # export in log
         log_state=gr.State([])
         # history log for gpt
         history_log=gr.State([])
@@ -1929,7 +690,6 @@ def create_ui():
         input_mask_state = gr.State(np.zeros((1, 1)))
         input_points_state = gr.State([])
         input_labels_state = gr.State([])
-        #store the selected image
         new_crop_save_path = gr.State(None)
         image_input_nobackground = gr.State(None)
         artist=gr.State(None)
@@ -1938,17 +698,12 @@ def create_ui():
         point_prompt = gr.State("Positive") 
         log_list=gr.State([])
         gender=gr.State('female')
-        # store the whole image path 
         image_path=gr.State('')
         pic_index=gr.State(None)
-        recomended_state=gr.State([])
-        
+        recomended_state=gr.State([])     
         recomended_path=gr.State(None)
         recomended_type=gr.State(None)
         
-   
-        
-
         with gr.Row():  
             
             with gr.Column(scale=6):
@@ -1970,21 +725,7 @@ def create_ui():
                         label="Check to autoplay audio", value=False, elem_classes="custom-autoplay",visible=False)
                         output_audio = gr.HTML(
                             label="Synthesised Audio", elem_classes="custom-output", visible=False)
-                    with gr.Tab("Base(GPT Power)",visible=False) as base_tab:
-                        image_input_base = gr.Image(type="pil", interactive=True, elem_classes="image_upload",height=650)
-                        with gr.Row():
-                            name_label_base = gr.Button(value="Name: ",elem_classes="info_btn")
-                            artist_label_base = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
-                            year_label_base = gr.Button(value="Year: ",elem_classes="info_btn_interact")
-                            material_label_base = gr.Button(value="Style: ",elem_classes="info_btn")   
                     
-                    with gr.Tab("Base2",visible=False) as base_tab2:
-                        image_input_base_2 = gr.Image(type="pil", interactive=True, elem_classes="image_upload",height=650)
-                        with gr.Row():
-                            name_label_base2 = gr.Button(value="Name: ",elem_classes="info_btn")
-                            artist_label_base2 = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
-                            year_label_base2 = gr.Button(value="Year: ",elem_classes="info_btn_interact")
-                            material_label_base2 = gr.Button(value="Style: ",elem_classes="info_btn")  
 
                     with gr.Row():
                         with gr.Column(scale=1,min_width=50,visible=False) as instruct:
@@ -2009,26 +750,10 @@ def create_ui():
                                         focus_dda = gr.Button(value="Judge",interactive=True,elem_classes="function_button")
 
                                         recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button_rec")
-                                # focus_asso = gr.Button(value="Associate",interactive=True,elem_classes="function_button",variant="primary")
+                             
 
                         
-
                                                                     
-                    with gr.Tab("Trajectory (beta)", visible=False) as traj_tab:
-                        # sketcher_input = ImageSketcher(type="pil", interactive=True, brush_radius=10, 
-                        #                                elem_id="image_sketcher")
-                        sketcher_input = gr.ImageEditor(type="pil", interactive=True
-                                                    )
-                        with gr.Row():
-                            name_label_traj = gr.Button(value="Name: ")
-                            artist_label_traj = gr.Button(value="Artist: ")
-                            year_label_traj = gr.Button(value="Year: ")
-                            material_label_traj = gr.Button(value="Material: ")
-                        # example_image_traj = gr.Image(type="pil", interactive=False, visible=False)
-                        with gr.Row():                   
-                            clear_button_sketcher = gr.Button(value="Clear Sketch", interactive=True)
-                            submit_button_sketcher = gr.Button(value="Submit", interactive=True)
-
                     with gr.Column(visible=False,scale=4) as modules_need_gpt1:
                         with gr.Row(visible=False):
                             sentiment = gr.Radio(
@@ -2044,15 +769,7 @@ def create_ui():
                                 label="Factuality",
                                 interactive=True,
                             )
-                            # length = gr.Slider(
-                            #     minimum=10,
-                            #     maximum=80,
-                            #     value=10,
-                            #     step=1,
-                            #     interactive=True,
-                            #     label="Generated Caption Length",
-                            # )
-                            # 是否启用wiki内容整合到caption中
+
                             enable_wiki = gr.Radio(
                                 choices=["Yes", "No"],
                                 value="No",
@@ -2068,7 +785,7 @@ def create_ui():
             with gr.Column(scale=4):                    
                 with gr.Column(visible=True) as module_key_input:
                     openai_api_key = gr.Textbox(
-                        value="sk-proj-bxHhgjZV8TVgd1IupZrUT3BlbkFJvrthq6zIxpZVk3vwsvJ9",
+                        value="",
                         placeholder="Input openAI API key",
                         show_label=False,
                         label="OpenAI API Key",
@@ -2076,15 +793,10 @@ def create_ui():
                         type="password")
                     with gr.Row():
                         enable_chatGPT_button = gr.Button(value="Run with ChatGPT", interactive=True, variant='primary')
-                        # disable_chatGPT_button = gr.Button(value="Run without ChatGPT (Faster)", interactive=True,
-                        #                                 variant='primary')
+
                 with gr.Column(visible=False) as module_notification_box:
                     notification_box = gr.Textbox(lines=1, label="Notification", max_lines=5, show_label=False)
-                
-                # with gr.Column() as modules_need_gpt0:
-                #     with gr.Column(visible=False) as modules_need_gpt2: 
-                #         paragraph_output = gr.Textbox(lines=16, label="Describe Everything", max_lines=16)
-                #         cap_everything_button = gr.Button(value="Caption Everything in a Paragraph", interactive=True)
+
                 with gr.Column(visible=False) as modules_not_need_gpt2: 
                             with gr.Blocks():
                                 chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=750,bubble_full_width=False)
@@ -2103,72 +815,23 @@ def create_ui():
                                             scale=5,
                                             interactive=True)
                 
-    
-
-                
-
-                    
-                    
-
-
-            
-                # TTS interface hidden initially
-            with gr.Column(visible=False) as tts_interface:
-                input_text = gr.Textbox(label="Text Prompt", value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality")
-                input_language = gr.Dropdown(label="Language", choices=["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn"], value="en")
-                input_audio = gr.Audio(label="Reference Audio", type="filepath", value="examples/female.wav")
-                input_mic = gr.Audio(sources="microphone", type="filepath", label="Use Microphone for Reference")
-                use_mic = gr.Checkbox(label="Check to use Microphone as Reference", value=False)
-                agree = gr.Checkbox(label="Agree", value=True)
-                output_waveform = gr.Video(label="Waveform Visual")
-                # output_audio = gr.HTML(label="Synthesised Audio")
-
-                with gr.Row():
-                    submit_tts = gr.Button(value="Submit", interactive=True)
-                    clear_tts = gr.Button(value="Clear", interactive=True)
         
         with gr.Row():
             with gr.Column(scale=6):
                 with gr.Row():
                     with gr.Column(visible=False) as recommend:
-        
-                        # sort_rec=gr.Dropdown(["1", "2", "3", "4"], visible=False,
-                        #             value=[], 
-                        #             multiselect=True, 
-                        #                 label="Score", info="Please sort the pictures according to your preference"
-                        # )
 
                         gallery_result = gr.Gallery(
                         label="Object-based Recommendation",
                         height="auto",
                         columns=2,
-                        interactive=False
-                        # columns=4, 
-                        # rows=2,
-                        # show_label=False, 
-                        # allow_preview=True, 
-                        # object_fit="contain", 
-                        # height="auto", 
-                        # preview=True, 
-                        # show_share_button=True, 
-                        # show_download_button=True
-                        )
+                        interactive=False)
                     
                         style_gallery_result = gr.Gallery(
                         label="Style-based Recommendation",
                         height="auto",
                         columns=2,
-                        interactive=False
-                        # columns=4, 
-                        # rows=2,
-                        # show_label=False, 
-                        # allow_preview=True, 
-                        # object_fit="contain", 
-                        # height="auto", 
-                        # preview=True, 
-                        # show_share_button=True, 
-                        # show_download_button=True
-                        )
+                        interactive=False)
                     with gr.Column(scale=3,visible=False) as reco_preview:
                         selected_image = gr.Image(label="Selected Image", interactive=False)
                     
@@ -2198,109 +861,7 @@ def create_ui():
         examples=examples,
         inputs=[example_image,task_instuction,task_type],
         )                
-            
-        
-        
-        
-        
-        ###############################################################################
-        ############# this part is for text to image #############
-        ###############################################################################
-
-        with gr.Row(variant="panel",visible=False) as text2image_model:
-
-            with gr.Column():
-                with gr.Column():
-                    gr.Radio(["Other Paintings by the Artist"], label="Artist", info="Who is the artist?🧑‍🎨"),
-                    gr.Radio(["Oil Painting","Printmaking","Watercolor Painting","Drawing"], label="Art Forms", info="What are the art forms?🎨"),
-                    gr.Radio(["Renaissance", "Baroque", "Impressionism","Modernism"], label="Period", info="Which art period?⏳"),
-                    # to be done 
-                    gr.Dropdown(
-                        ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Items", info="Which items are you interested in?"
-                    )
-            
-                with gr.Row():
-                    prompt = gr.Text(
-                        label="Prompt",
-                        show_label=False,
-                        max_lines=1,
-                        placeholder="Enter your prompt",
-                        container=False,
-                    )
-                    run_button = gr.Button("Run")
-                
-                with gr.Accordion("Advanced options", open=False):
-                    num_images = gr.Slider(
-                        label="Number of Images",
-                        minimum=1,
-                        maximum=4,
-                        step=1,
-                        value=4,
-                    )
-                    with gr.Row():
-                        use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True)
-                        negative_prompt = gr.Text(
-                            label="Negative prompt",
-                            max_lines=5,
-                            lines=4,
-                            placeholder="Enter a negative prompt",
-                            value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, NSFW",
-                            visible=True,
-                        )
-                    seed = gr.Slider(
-                        label="Seed",
-                        minimum=0,
-                        maximum=MAX_SEED,
-                        step=1,
-                        value=0,
-                    )
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    with gr.Row():
-                        width = gr.Slider(
-                            label="Width",
-                            minimum=100,
-                            maximum=MAX_IMAGE_SIZE,
-                            step=64,
-                            value=1024,
-                        )
-                        height = gr.Slider(
-                            label="Height",
-                            minimum=100,
-                            maximum=MAX_IMAGE_SIZE,
-                            step=64,
-                            value=1024,
-                        )
-                    with gr.Row():
-                        guidance_scale = gr.Slider(
-                            label="Guidance Scale",
-                            minimum=0.1,
-                            maximum=6,
-                            step=0.1,
-                            value=3.0,
-                        )
-                        num_inference_steps = gr.Slider(
-                            label="Number of inference steps",
-                            minimum=1,
-                            maximum=15,
-                            step=1,
-                            value=8,
-                        )
-            # with gr.Column():
-            #     result = gr.Gallery(
-            #         label="Result",
-            #         height="auto",
-            #         columns=4
-            #         # columns=4, 
-            #         # rows=2,
-            #         # show_label=False, 
-            #         # allow_preview=True, 
-            #         # object_fit="contain", 
-            #         # height="auto", 
-            #         # preview=True, 
-            #         # show_share_button=True, 
-            #         # show_download_button=True
-            #     )
-        
+   
         with gr.Row(visible=False) as export:
             chat_log_file = gr.File(label="Download Chat Log",scale=5)
         
@@ -2321,59 +882,15 @@ def create_ui():
                                 interactive=True,
                                 label="Generated Caption Length",
                             )
-            
-            # auto_play = gr.Checkbox(
-            # label="Check to autoplay audio", value=False, elem_classes="custom-autoplay"
-        # )
-        #     output_audio = gr.HTML(
-        #         label="Synthesised Audio", elem_classes="custom-output"
-        #     )
 
-
-
-        # gr.Examples(
-        #     examples=examples,
-        #     inputs=prompt,
-        #     cache_examples=False
-        # )
-    
-        use_negative_prompt.change(
-            fn=lambda x: gr.update(visible=x),
-            inputs=use_negative_prompt,
-            outputs=negative_prompt,
-            api_name=False,
-        )
-    
-        # gr.on(
-        #     triggers=[
-        #         prompt.submit,
-        #         negative_prompt.submit,
-        #         run_button.click,
-        #     ],
-        #     fn=generate,
-        #     inputs=[
-        #         prompt,
-        #         negative_prompt,
-        #         use_negative_prompt,
-        #         seed,
-        #         width,
-        #         height,
-        #         guidance_scale,
-        #         num_inference_steps,
-        #         randomize_seed,
-        #         num_images
-        #     ],
-        #     outputs=[result, seed],
-        #     api_name="run",
-        # )
         recommend_btn.click(
-            fn=infer,
+            fn=recommender.infer,
             inputs=[new_crop_save_path,image_path,state,language,task_type],
             outputs=[gallery_result,style_gallery_result,chatbot,state]
             )
 
         gallery_result.select(
-            item_associate,
+            recommender.item_associate,
             inputs=[new_crop_save_path,openai_api_key,language,auto_play,length,log_state,sort_rec,naritive,recomended_state],
             outputs=[recommend_bot,recomended_state,output_audio,log_state,pic_index,recommend_score,selected_image,recomended_path, recomended_type],
             
@@ -2381,160 +898,18 @@ def create_ui():
         )
         
         style_gallery_result.select(
-            style_associate,
+            recommender.style_associate,
             inputs=[image_path,openai_api_key,language,auto_play,length,log_state,sort_rec,naritive,recomended_state,artist_label],
-            outputs=[recommend_bot,recomended_state,output_audio,log_state,pic_index,recommend_score,selected_image,recomended_path,recomended_type],
-            
-            
-        )
+            outputs=[recommend_bot,recomended_state,output_audio,log_state,pic_index,recommend_score,selected_image,recomended_path,recomended_type])
         
         selected_image.select(
             get_recommendation,
             inputs=[new_crop_save_path,image_path, openai_api_key,language,auto_play,length,log_state,sort_rec,naritive,recomended_state,recomended_type,artist_label,recomended_path],
             outputs=[recommend_bot,recomended_state,output_audio,log_state,pic_index,recommend_score])
         
-        ###############################################################################
-        ############# above part is for text to image #############
-        ###############################################################################
-
-        
-        ###############################################################################
-        # this part is for 3d generate.
-        ###############################################################################
-        
-        # with gr.Row(variant="panel",visible=False) as d3_model:
-        #     with gr.Column():
-        #         with gr.Row():
-        #             input_image = gr.Image(
-        #                 label="Input Image",
-        #                 image_mode="RGBA",
-        #                 sources="upload",
-        #                 #width=256,
-        #                 #height=256,
-        #                 type="pil",
-        #                 elem_id="content_image",
-        #             )
-        #             processed_image = gr.Image(
-        #                 label="Processed Image", 
-        #                 image_mode="RGBA", 
-        #                 #width=256,
-        #                 #height=256,
-        #                 type="pil", 
-        #                 interactive=False
-        #             )
-        #         with gr.Row():
-        #             with gr.Group():
-        #                 do_remove_background = gr.Checkbox(
-        #                     label="Remove Background", value=True
-        #                 )
-        #                 sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
-    
-        #                 sample_steps = gr.Slider(
-        #                     label="Sample Steps",
-        #                     minimum=30,
-        #                     maximum=75,
-        #                     value=75,
-        #                     step=5
-        #                 )
-    
-        #         with gr.Row():
-        #             submit = gr.Button("Generate", elem_id="generate", variant="primary")
-    
-        #         with gr.Row(variant="panel"):
-        #             gr.Examples(
-        #                 examples=[
-        #                     os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
-        #                 ],
-        #                 inputs=[input_image],
-        #                 label="Examples",
-        #                 cache_examples=False,
-        #                 examples_per_page=16
-        #             )
-
-        #     with gr.Column():
-    
-        #         with gr.Row():
-    
-        #             with gr.Column():
-        #                 mv_show_images = gr.Image(
-        #                     label="Generated Multi-views",
-        #                     type="pil",
-        #                     width=379,
-        #                     interactive=False
-        #                 )
-    
-        #             # with gr.Column():
-        #             #     output_video = gr.Video(
-        #             #         label="video", format="mp4",
-        #             #         width=379,
-        #             #         autoplay=True,
-        #             #         interactive=False
-        #             #     )
-    
-        #         with gr.Row():
-        #             with gr.Tab("OBJ"):
-        #                 output_model_obj = gr.Model3D(
-        #                     label="Output Model (OBJ Format)",
-        #                     interactive=False,
-        #                 )
-        #                 gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
-        #             with gr.Tab("GLB"):
-        #                 output_model_glb = gr.Model3D(
-        #                     label="Output Model (GLB Format)",
-        #                     interactive=False,
-        #                 )
-        #                 gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
-    
-                
-        
-        
-        # mv_images = gr.State()
         
         chatbot.like(print_like_dislike, inputs=[state,log_state], outputs=[log_state,chatbot])
-
-        # submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
-        #     fn=generate_mvs,
-        #     inputs=[processed_image, sample_steps, sample_seed],
-        #     outputs=[mv_images, mv_show_images]
-            
-        # ).success(
-        #     fn=make3d,
-        #     inputs=[mv_images],
-        #     outputs=[output_model_obj, output_model_glb]
-        # )
-        
-        ###############################################################################
-        # above part is for 3d generate.
-        ###############################################################################
-       
-        
-        def clear_tts_fields():
-            return [gr.update(value=""), gr.update(value=""), None, None, gr.update(value=False), gr.update(value=True), None, None]
-
-        # submit_tts.click(
-        #     tts.predict,
-        #     inputs=[input_text, input_language, input_audio, input_mic, use_mic, agree],
-        #     outputs=[output_waveform, output_audio],
-        #     queue=True
-        # )
-
-        clear_tts.click(
-            clear_tts_fields,
-            inputs=None,
-            outputs=[input_text, input_language, input_audio, input_mic, use_mic, agree, output_waveform, output_audio],
-            queue=False
-        )
-        
-
-
-        
-        # clear_button_sketcher.click(
-        #     lambda x: (x),
-        #     [origin_image],
-        #     [sketcher_input],
-        #     queue=False,
-        #     show_progress=False
-        # )
+              
         
         recommend_score.select(
             get_recommendationscore,
@@ -2542,73 +917,27 @@ def create_ui():
             outputs=[log_state],
         )
 
-    
-
-
-        
+   
         openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
                               outputs=[export, modules_need_gpt1, modules_need_gpt3, modules_not_need_gpt,
-                                       modules_not_need_gpt2, tts_interface, module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row,recommend,reco_reasons,instruct,modules_not_need_gpt3,sort_rec,reco_preview])
+                                       modules_not_need_gpt2, module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row,recommend,reco_reasons,instruct,modules_not_need_gpt3,sort_rec,reco_preview])
         enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
                                     outputs=[export,modules_need_gpt1, modules_need_gpt3,
                                              modules_not_need_gpt,
-                                             modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row,recommend,reco_reasons,instruct,modules_not_need_gpt3,sort_rec,reco_preview])
-        
-        # disable_chatGPT_button.click(init_wo_openai_api_key,
-        #                              outputs=[export,modules_need_gpt1, modules_need_gpt3,
-        #                                       modules_not_need_gpt,
-        #                                       modules_not_need_gpt2, tts_interface,module_key_input, module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
+                                             modules_not_need_gpt2,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row,recommend,reco_reasons,instruct,modules_not_need_gpt3,sort_rec,reco_preview])
         
-        # artist_label_base2.click(
-        #     get_artistinfo,
-        #     inputs=[artist_label_base2,openai_api_key,state,language,auto_play,length],
-        #     outputs=[chatbot,state,output_audio]
-        # )
         artist_label.click(
             get_artistinfo,
             inputs=[artist_label,openai_api_key,state,language,auto_play,length,log_state],
             outputs=[chatbot,state,output_audio,log_state]
         )
-        # artist_label_traj.click(
-        #     get_artistinfo,
-        #     inputs=[artist_label_traj,openai_api_key,state,language,auto_play,length],
-        #     outputs=[chatbot,state,output_audio]
-        # )
-        
-        # year_label_base2.click(
-        #     get_yearinfo,
-        #     inputs=[year_label_base2,openai_api_key,state,language,auto_play,length],
-        #     outputs=[chatbot,state,output_audio]
-        # )
+
         year_label.click(
             get_yearinfo,
             inputs=[year_label,openai_api_key,state,language,auto_play,length,log_state],
             outputs=[chatbot,state,output_audio,log_state]
         )
-        # year_label_traj.click(
-        #     get_yearinfo,
-        #     inputs=[year_label_traj,openai_api_key,state,language,auto_play,length],
-        #     outputs=[chatbot,state,output_audio]
-        # )
-        
-        
-        # enable_chatGPT_button.click(
-        #     lambda: (None, [], [], [[], [], []], "", "", ""),
-        #     [],
-        #     [image_input, chatbot, state, click_state, paragraph_output, origin_image],
-        #     queue=False,
-        #     show_progress=False
-        # )
-        # openai_api_key.submit(
-        #     lambda: (None, [], [], [[], [], []], "", "", ""),
-        #     [],
-        #     [image_input, chatbot, state, click_state, paragraph_output, origin_image],
-        #     queue=False,
-        #     show_progress=False
-        # )
-
-        # cap_everything_button.click(cap_everything, [paragraph, visual_chatgpt, language,auto_play], 
-        #                             [paragraph_output,output_audio])
+
         def reset_and_add(origin_image):
             new_prompt = "Positive"
             new_add_icon = "assets/icons/plus-square-blue.png"
@@ -2625,14 +954,7 @@ def create_ui():
             show_progress=False
         )
         clear_button_click.click(functools.partial(clear_chat_memory, keep_global=True), inputs=[visual_chatgpt])
-        # clear_button_image.click(
-        #     lambda: (None, [], [], [[], [], []], "", "", ""),
-        #     [],
-        #     [image_input, chatbot, state, click_state, paragraph, origin_image],
-        #     queue=False,
-        #     show_progress=False
-        # )
-        # clear_button_image.click(clear_chat_memory, inputs=[visual_chatgpt])
+
         clear_button_text.click(
             lambda: ([], [], [[], [], [], []],[]),
             [],
@@ -2651,52 +973,21 @@ def create_ui():
         )
 
         image_input.clear(clear_chat_memory, inputs=[visual_chatgpt])
-        
-        # image_input.change(
-        #     lambda: ([], [], [[], [], []], [], []),
-        #     [],
-        #     [chatbot, state, click_state, history_log, log_state],
-        #     queue=False,
-        #     show_progress=False
-        # )
-
-
-        
-
-        # image_input_base.upload(upload_callback, [image_input_base, state, visual_chatgpt,openai_api_key,language,naritive],
-        #                    [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
-        #                     image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
-        #                     name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
-        #                         paragraph,artist,gender,image_path])
-        
-        # image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key,language,naritive],
-        #                    [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
-        #                     image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
-        #                     name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
-        #                         paragraph,artist,gender,image_path])
+          
         
         image_input.upload(upload_callback, [image_input, state, log_state,task_type, visual_chatgpt,openai_api_key,language,naritive,history_log,auto_play,session_type],
-                           [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
-                            image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
-                            name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
-                                paragraph,artist,gender,image_path,log_state,history_log,output_audio])
+                           [chatbot, state, origin_image, click_state, image_input,image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,\
+                            paragraph,artist,gender,image_path,log_state,history_log,output_audio])
         
-        # sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
-        #                    [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
-        #                     image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
-        #                     name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
-        #                         paragraph,artist])
         
         
         chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play,gender,openai_api_key,image_path,log_state,history_log,naritive],
                           [chatbot, state, aux_state,output_audio,log_state,history_log])
-        # chat_input.submit(lambda: "", None, chat_input)
+
         chat_input.submit(lambda: {"text": ""}, None, chat_input)
 
         example_image.change(upload_callback, [example_image, state, log_state, task_type,  visual_chatgpt, openai_api_key,language,naritive,history_log,auto_play,session_type],
-                             [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
-                              image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
-                            name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
+                             [chatbot, state, origin_image, click_state, image_input, image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,\
                             paragraph,artist,gender,image_path, log_state,history_log,output_audio])
 
         example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
@@ -2704,41 +995,7 @@ def create_ui():
             lambda:([],[],[],None,[],gr.update(value="Preview")), 
             [],
             [gallery_result,style_gallery_result,recommend_bot,new_crop_save_path,chatbot,recommend_type])
-
-        # def on_click_tab_selected():
-        #     if gpt_state ==1:
-        #         print(gpt_state)
-        #         print("using gpt")
-        #         return [gr.update(visible=True)]*2+[gr.update(visible=False)]*2
-        #     else: 
-        #         print("no gpt")
-        #         print("gpt_state",gpt_state)
-        #         return [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2
-        
-        # def on_base_selected():
-        #     if gpt_state ==1:
-        #         print(gpt_state)
-        #         print("using gpt")
-        #         return [gr.update(visible=True)]*2+[gr.update(visible=False)]*2
-        #     else: 
-        #         print("no gpt")
-        #         return [gr.update(visible=False)]*4
         
-
-        # traj_tab.select(on_click_tab_selected, outputs=[modules_need_gpt1,modules_not_need_gpt2,modules_need_gpt0,modules_need_gpt2])
-        # click_tab.select(on_click_tab_selected, outputs=[modules_need_gpt1,modules_not_need_gpt2,modules_need_gpt0,modules_need_gpt2])
-        # base_tab.select(on_base_selected, outputs=[modules_need_gpt0,modules_need_gpt2,modules_not_need_gpt2,modules_need_gpt1])
-        # base_tab2.select(on_base_selected, outputs=[modules_not_need_gpt2,modules_not_need_gpt2,modules_need_gpt0,modules_need_gpt1])
-        
-        def print_reason():
-            print("reason")
-            
-            
-
-
-  
-  
-
         image_input.select(
             inference_click,
             inputs=[
@@ -2830,19 +1087,6 @@ def create_ui():
             
         )
                 
-
-        
-    
-
-        # submit_button_sketcher.click(
-        #     inference_traject,
-        #     inputs=[
-        #         origin_image,sketcher_input, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
-        #         original_size, input_size, text_refiner,focus_type_sketch,paragraph,openai_api_key,auto_play,Input_sketch
-        #     ],
-        #     outputs=[chatbot, state, sketcher_input,output_audio,new_crop_save_path],
-        #     show_progress=False, queue=True
-        # )
         
         export_button.click(
             export_chat_log,
@@ -2869,21 +1113,6 @@ def create_ui():
             [],
             [image_input, chatbot, state, click_state, paragraph, origin_image,history_log,log_state,task_instuction,task_type,gallery_result,style_gallery_result,recommend_bot]
         )
-        
-        # upvote_btn.click(
-        #     handle_liked,
-        #     inputs=[state,like_res],
-        #     outputs=[chatbot,like_res]
-        # )
-        
-        # downvote_btn.click(
-        #     handle_disliked,
-        #     inputs=[state,dislike_res],
-        #     outputs=[chatbot,dislike_res]
-        # )
-
-
-
 
         return iface
 
@@ -2891,6 +1120,5 @@ def create_ui():
 if __name__ == '__main__':
     print("main")
     iface = create_ui()
-    # iface.queue(api_open=False, max_size=10)
-    iface.queue(api_open=False, max_size=40)
+    iface.queue(api_open=False, max_size=10)
     iface.launch(server_name="0.0.0.0")
\ No newline at end of file