EyeSee_chi

Running

App Files Files Community

hyzhang00 commited on 13 days ago

Commit

54ec520

1 Parent(s): 1ff4f32

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

examples/examples_.DS_Store → .DS_Store +0 -0
app.py +0 -0
assets/UI.png +0 -3
assets/caption_anything_logo.png +0 -0
assets/demo1.jpg +0 -3
assets/demo1.png +0 -3
assets/demo1.svg +0 -0
assets/demo2.png +0 -0
assets/demo2.svg +0 -0
assets/qingming.gif +0 -3
assets/times_with_simsun.ttf +0 -3
assets/title.png +0 -0
assets/title.svg +0 -1
chatbox.py → backend/chatbox.py +0 -0
backend/gpt_service/__init__.py +4 -0
backend/gpt_service/info_queries.py +39 -0
backend/gpt_service/utils.py +75 -0
backend/prompts/__init__.py +3 -0
backend/prompts/generate_prompt.py +23 -0
backend/prompts/prompt_templates.py +91 -0
backend/recommendation/__init__.py +4 -0
backend/recommendation/config.py +23 -0
backend/recommendation/recommender.py +107 -0
backend/texttospeech/tts.py +33 -0
configs/instant-mesh-base.yaml +0 -22
configs/instant-mesh-large-train.yaml +0 -67
configs/instant-mesh-large.yaml +0 -22
configs/instant-nerf-base.yaml +0 -21
configs/instant-nerf-large-train.yaml +0 -65
configs/instant-nerf-large.yaml +0 -21
configs/zero123plus-finetune.yaml +0 -47
examples/female.wav +0 -3
examples/male.wav +0 -0
recomendation_pic/1.8.jpg +0 -0
recomendation_pic/1.9.jpg +0 -0
recomendation_pic/2.8.jpg +0 -0
recomendation_pic/2.9.png +0 -0
recomendation_pic/3.8.png +0 -0
recomendation_pic/3.9.png +0 -0
recomendation_pic/basket-2.png +0 -0
recomendation_pic/readme.md +0 -0
test_images/1.The Ambassadors.jpg +0 -0
test_images/2.Football Players.jpg +0 -0
test_images/3-square.jpg +0 -3
test_images/3.Along the River during the Qingming Festival.jpeg +0 -3
test_images/MUS.png +0 -0
test_images/Picture0.png +0 -0
test_images/Picture1.png +0 -0
test_images/Picture10.png +0 -0
test_images/Picture2.png +0 -0

examples/examples_.DS_Store → .DS_Store RENAMED Viewed

Binary files a/examples/examples_.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

assets/UI.png DELETED Viewed

Git LFS Details

SHA256: bce7f8b8b11832a98d85ecf7755274df5860d9b5eb35738dabbb2e585d70ddd4
Pointer size: 132 Bytes
Size of remote file: 2.64 MB

assets/caption_anything_logo.png DELETED Viewed

Binary file (150 kB)

assets/demo1.jpg DELETED Viewed

Git LFS Details

SHA256: 7a3bf5f8e4e8a79824f06916cdd41c94c23c5159abf3ecd5045732f27dd358f2
Pointer size: 132 Bytes
Size of remote file: 1.87 MB

assets/demo1.png DELETED Viewed

Git LFS Details

SHA256: 2bd22e897705a8cebb3f1fc2ddf857eeeb1736b7b627cf8c24ed84c17728a4cc
Pointer size: 132 Bytes
Size of remote file: 1.79 MB

assets/demo1.svg DELETED Viewed

assets/demo2.png DELETED Viewed

Binary file (726 kB)

assets/demo2.svg DELETED Viewed

assets/qingming.gif DELETED Viewed

Git LFS Details

SHA256: dc052aad5ab86a9a0ac1483853f2370686add2a4b0a5088be86598bec01b533e
Pointer size: 132 Bytes
Size of remote file: 4.64 MB

assets/times_with_simsun.ttf DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0b15a12dd4bba4a48885c279a1d16590b652773f02137a7e62ede3411970c59f
-size 11066612

assets/title.png DELETED Viewed

Binary file (40.8 kB)

assets/title.svg DELETED Viewed

chatbox.py → backend/chatbox.py RENAMED Viewed

File without changes

backend/gpt_service/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .utils import get_gpt_response
+from .info_queries import get_artistinfo, get_yearinfo
+__all__ = ['get_gpt_response', 'get_artistinfo', 'get_yearinfo']

backend/gpt_service/info_queries.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import re
+import emoji
+from .utils import get_gpt_response
+async def get_artistinfo(artist_name, api_key, state, language, autoplay, length, log_state, texttospeech_fn):
+    prompt = (
+        f"Provide a concise summary of about {length} words in {language} on the painter {artist_name}, "
+        "covering his biography, major works, artistic style, significant contributions to the art world, "
+        "and any major awards or recognitions he has received. Start your response with 'Artist Background: '."
+    )
+    res = get_gpt_response(api_key, None, prompt)
+    state = state + [(None, res)]
+    read_info = re.sub(r'[#[\]!*]', '', res)
+    read_info = emoji.replace_emoji(read_info, replace="")
+    log_state = log_state + [(f"res", None)]
+    if autoplay:
+        audio_output = await texttospeech_fn(read_info, language)
+        return state, state, audio_output, log_state
+    return state, state, None, log_state
+async def get_yearinfo(year, api_key, state, language, autoplay, length, log_state, texttospeech_fn):
+    prompt = (
+        f"Provide a concise summary of about {length} words in {language} on the art historical period "
+        f"associated with the year {year}, covering its major characteristics, influential artists, "
+        "notable works, and its significance in the broader context of art history with 'History Background: '."
+    )
+    res = get_gpt_response(api_key, None, prompt)
+    log_state = log_state + [(f"res", None)]
+    state = state + [(None, res)]
+    read_info = re.sub(r'[#[\]!*]', '', res)
+    read_info = emoji.replace_emoji(read_info, replace="")
+    if autoplay:
+        audio_output = await texttospeech_fn(read_info, language)
+        return state, state, audio_output, log_state
+    return state, state, None, log_state

backend/gpt_service/utils.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import json
+import requests
+import base64
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+def get_gpt_response(api_key, image_path, prompt, history=None):
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    if history:
+        if len(history) > 4:
+            history = history[-4:]
+    else:
+        history = []
+    messages = history[:]
+    base64_images = []
+    if image_path:
+        if isinstance(image_path, list):
+            for img in image_path:
+                base64_image = encode_image(img)
+                base64_images.append(base64_image)
+        else:
+            base64_image = encode_image(image_path)
+            base64_images.append(base64_image)
+        messages.append({
+            "role": "user",
+            "content": [
+                    {
+                        "type": "text",
+                        "text": prompt
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_images}"
+                        }
+                    }
+                ]
+        })
+    else:
+        messages.append({
+            "role": "user",
+            "content": prompt
+        })
+    payload = {
+        "model": "gpt-4o",
+        "messages": messages,
+        "max_tokens": 600
+    }
+    # Sending the request to the OpenAI API
+    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+    result = response.json()
+    print("gpt result",result)
+    try:
+        content = result['choices'][0]['message']['content']
+        if content.startswith("```json"):
+                content = content[7:]
+        if content.endswith("```"):
+            content = content[:-3]
+        return content
+    except (KeyError, IndexError, json.JSONDecodeError) as e:
+        return json.dumps({"error": "Failed to parse model output", "details": str(e)})

backend/prompts/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .generate_prompt import generate_prompt
2	+
3	+ __all__ = ['generate_prompt']

backend/prompts/generate_prompt.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from .prompt_templates import PromptTemplates
+def generate_prompt(focus_type, paragraph, length, sentiment, factuality, language, narrative):
+    mapped_value = PromptTemplates.FOCUS_MAP.get(focus_type, -1)
+    narrative_value = PromptTemplates.NARRATIVE_MAPPING[narrative]
+    controls = {
+        'length': length,
+        'sentiment': sentiment,
+        'factuality': factuality,
+        'language': language
+    }
+    if mapped_value != -1:
+        prompt = PromptTemplates.ANALYSIS_PROMPTS[narrative_value][mapped_value].format(
+            Wiki_caption=paragraph,
+            length=controls['length'],
+            sentiment=controls['sentiment'],
+            language=controls['language']
+        )
+    else:
+        prompt = "Invalid focus type."
+    return prompt

backend/prompts/prompt_templates.py ADDED Viewed

	@@ -0,0 +1,91 @@

+class PromptTemplates:
+    FOCUS_MAP = {
+        "Describe": 0,
+        "D+Analysis": 1,
+        "DA+Interprete": 2,
+        "Judge": 3
+    }
+    NARRATIVE_MAPPING = {
+        "Narrator": 0,
+        "Artist": 1,
+        "In-Situ": 2
+    }
+    ANALYSIS_PROMPTS = [
+        [
+            'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
+            'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption.  Each point listed is to be in {language} language, with a response length of about {length} words.',
+            'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
+            'Wiki_caption: {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
+        ],
+        [
+            "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
+            "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact and one analysis from art appreciation perspective as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
+            "When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact, one analysis, and one interpret from art appreciation perspective as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
+            "When generating the answer, you should tell others that you are one of the creators of these paintings and generate the text in the tone and manner as if you are the creator of the painting. According to image and wiki_caption {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
+        ],
+        [
+            'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
+            'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis from art appreciation perspective as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
+            'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis from art appreciation perspective and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object and start every sentence with I. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I.  Each point listed is to be in {language} language, with a response length of about {length} words.',
+            'When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. According to image and wiki_caption {Wiki_caption}, You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the object of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
+        ]
+        ]
+    RECOMMENDATION_PROMPTS = [
+    [
+    '''
+    First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
+    Recommendation reason: {{Recommendation based on {{object}} in the painting you saw earlier. Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate in three points. }}
+    Each bullet point should be in {language} language, with a response length of about {length} words.
+    ''',
+    '''
+    When generating answers, you should tell people that I am the creator of painting you were looking at earlier itself, and generate text in the tone and manner in which you are the creator of painting were looking at earlier.
+    First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
+    Recommendation reason: {{I'm the creator of that painting you saw earlier. I'm an artist. and I'm recommending this painting based on the fact that the {{object}} I've drawn also appear in the painting you're looking at. }} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the creator of painting were looking at earlier and start every sentence with I.
+    Each bullet point should be in {language} language, with a response length of about {length} words.
+    ''',
+    '''
+    When generating answers, you should tell people that you are the object itself that was selected in the painting, and generate text in the tone and manner in which you are the object
+    First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
+    Recommendation reason: {{I'm the {{object}} in the painting you were looking at earlier, and I'm recommending this painting based on the fact that I'm also present in the one you're looking at.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the object of this painting and start every sentence with I.
+    Each bullet point should be in {language} language, with a response length of about {length} words.
+    '''],
+    [
+    '''
+    First identify what the name of the first painting is, you save yourself as the parameter: {{name}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
+    Recommendation reason: {{Recommendation based on the painting {{name}}.Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate in three points.}}
+    Each bullet point should be in {language} language, with a response length of about {length} words.
+    ''',
+    '''
+    When generating answers, you should tell people that I am the creator of painting you were looking at earlier itself, and generate text in the tone and manner in which you are the creator of painting were looking at earlier.
+    First identify what the creator of the first painting is, you save yourself as the parameter: {artist}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
+    Recommendation reason: {{I'm the creator of that painting you saw earlier, {artist}. I'm an artist. and I'm recommending this painting based on the fact that the painting you're looking at is similar to the one you just saw of me.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the creator of painting were looking at earlier and start every sentence with I.
+    Each bullet point should be in {language} language, with a response length of about {length} words.
+    ''',
+    '''
+    When generating answers, you should tell people that I am the painting you were looking at earlier itself, and generate text in the tone and manner in which you are the painting were looking at earlier.
+    First identify what the name of the first painting is, you save yourself as the parameter: {{name}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
+    Recommendation reason: {{I'm the painting {{name}} you were looking at earlier, and I'm recommending this painting based on the fact that I'm similar to the one you're looking at.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the painting were looking at earlier and start every sentence with I.
+    Each bullet point should be in {language} language, with a response length of about {length} words.
+    '''],
+]

backend/recommendation/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .config import RecommendationConfig
+from .recommender import ImageRecommender
+__all__ = ['RecommendationConfig', 'ImageRecommender']

backend/recommendation/config.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import torch
+from transformers import AutoProcessor, SiglipModel
+from huggingface_hub import hf_hub_download
+import faiss
+import pandas as pd
+class RecommendationConfig:
+    def __init__(self):
+        hf_hub_download("merve/siglip-faiss-wikiart", "siglip_10k_latest.index", local_dir="./")
+        hf_hub_download("merve/siglip-faiss-wikiart", "wikiart_10k_latest.csv", local_dir="./")
+        self.index = faiss.read_index("./siglip_10k_latest.index")
+        self.df = pd.read_csv("./wikiart_10k_latest.csv")
+        self.device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
+        self.processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
+        self.model = SiglipModel.from_pretrained("google/siglip-base-patch16-224").to(self.device)
+    def get_messages(self, language):
+        return {
+            "English": "🖼️ Please refer to the section below to see the recommended results.",
+            "Chinese": "🖼️  请到下方查看推荐结果。"
+        }[language]

backend/recommendation/recommender.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import torch
+import numpy as np
+from PIL import Image
+from io import BytesIO
+import requests
+import spaces
+import gradio as gr
+import re
+import emoji
+from ..prompts.prompt_templates import PromptTemplates
+class ImageRecommender:
+    def __init__(self, config):
+        self.config = config
+    def read_image_from_url(self, url):
+        response = requests.get(url)
+        img = Image.open(BytesIO(response.content)).convert("RGB")
+        return img
+    def extract_features_siglip(self, image):
+        with torch.no_grad():
+            inputs = self.config.processor(images=image, return_tensors="pt").to(self.config.device)
+            image_features = self.config.model.get_image_features(**inputs)
+        return image_features
+    def process_image(self, image_path, num_results=2):
+        input_image = Image.open(image_path).convert("RGB")
+        input_features = self.extract_features_siglip(input_image)
+        input_features = input_features.detach().cpu().numpy()
+        input_features = np.float32(input_features)
+        faiss.normalize_L2(input_features)
+        distances, indices = self.config.index.search(input_features, num_results)
+        gallery_output = []
+        for i, v in enumerate(indices[0]):
+            sim = -distances[0][i]
+            image_url = self.config.df.iloc[v]["Link"]
+            img_retrieved = self.read_image_from_url(image_url)
+            gallery_output.append(img_retrieved)
+        return gallery_output
+    @spaces.GPU
+    def infer(self, crop_image_path, full_image_path, state, language, task_type=None):
+        style_gallery_output = []
+        item_gallery_output = []
+        if crop_image_path:
+            item_gallery_output = self.process_image(crop_image_path, 2)
+            style_gallery_output = self.process_image(full_image_path, 2)
+        else:
+            style_gallery_output = self.process_image(full_image_path, 4)
+        msg = self.config.get_messages(language)
+        state += [(None, msg)]
+        return item_gallery_output, style_gallery_output, state, state
+    async def item_associate(self, new_crop, openai_api_key, language, autoplay, length,
+                           log_state, sort_score, narrative, state, evt: gr.SelectData):
+        rec_path = evt._data['value']['image']['path']
+        return (
+            state,
+            state,
+            None,
+            log_state,
+            None,
+            gr.update(value=[]),
+            rec_path,
+            rec_path,
+            "Item"
+        )
+    async def style_associate(self, image_path, openai_api_key, language, autoplay,
+                            length, log_state, sort_score, narrative, state, artist,
+                            evt: gr.SelectData):
+        rec_path = evt._data['value']['image']['path']
+        return (
+            state,
+            state,
+            None,
+            log_state,
+            None,
+            gr.update(value=[]),
+            rec_path,
+            rec_path,
+            "Style"
+        )
+    def generate_recommendation_prompt(self, recommend_type, narrative, language, length, artist=None):
+        narrative_value = PromptTemplates.NARRATIVE_MAPPING[narrative]
+        prompt_type = 0 if recommend_type == "Item" else 1
+        if narrative_value == 1 and recommend_type == "Style":
+            return PromptTemplates.RECOMMENDATION_PROMPTS[prompt_type][narrative_value].format(
+                language=language,
+                length=length,
+                artist=artist[8:] if artist else ""
+            )
+        else:
+            return PromptTemplates.RECOMMENDATION_PROMPTS[prompt_type][narrative_value].format(
+                language=language,
+                length=length
+            )

backend/texttospeech/tts.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import edge_tts
+import base64
+from io import BytesIO
+filtered_language_dict = {
+    'English': {'female': 'en-US-JennyNeural', 'male': 'en-US-GuyNeural'},
+    'Chinese': {'female': 'zh-CN-XiaoxiaoNeural', 'male': 'zh-CN-YunxiNeural'},
+    'French': {'female': 'fr-FR-DeniseNeural', 'male': 'fr-FR-HenriNeural'},
+    'Spanish': {'female': 'es-MX-DaliaNeural', 'male': 'es-MX-JorgeNeural'},
+    'Arabic': {'female': 'ar-SA-ZariyahNeural', 'male': 'ar-SA-HamedNeural'},
+    'Portuguese': {'female': 'pt-BR-FranciscaNeural', 'male': 'pt-BR-AntonioNeural'},
+    'Cantonese': {'female': 'zh-HK-HiuGaaiNeural', 'male': 'zh-HK-WanLungNeural'}
+}
+async def texttospeech(text, language, gender='female'):
+    try:
+        voice = filtered_language_dict[language][gender]
+        communicate = edge_tts.Communicate(text=text, voice=voice, rate="+25%")
+        file_path = "output.wav"
+        await communicate.save(file_path)
+        with open(file_path, "rb") as audio_file:
+            audio_bytes = BytesIO(audio_file.read())
+        audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
+        print("TTS processing completed.")
+        audio_style = 'style="width:210px;"'
+        audio_player = f'<audio src="data:audio/wav;base64,{audio}" controls autoplay {audio_style}></audio>'
+        return audio_player
+    except Exception as e:
+        print(f"Error in texttospeech: {e}")
+        return None

configs/instant-mesh-base.yaml DELETED Viewed

@@ -1,22 +0,0 @@
-model_config:
-  target: src.models.lrm_mesh.InstantMesh
-  params:
-    encoder_feat_dim: 768
-    encoder_freeze: false
-    encoder_model_name: facebook/dino-vitb16
-    transformer_dim: 1024
-    transformer_layers: 12
-    transformer_heads: 16
-    triplane_low_res: 32
-    triplane_high_res: 64
-    triplane_dim: 40
-    rendering_samples_per_ray: 96
-    grid_res: 128
-    grid_scale: 2.1
-infer_config:
-  unet_path: ckpts/diffusion_pytorch_model.bin
-  model_path: ckpts/instant_mesh_base.ckpt
-  texture_resolution: 1024
-  render_resolution: 512

configs/instant-mesh-large-train.yaml DELETED Viewed

@@ -1,67 +0,0 @@
-model:
-  base_learning_rate: 4.0e-05
-  target: src.model_mesh.MVRecon
-  params:
-    init_ckpt: logs/instant-nerf-large-train/checkpoints/last.ckpt
-    input_size: 320
-    render_size: 512
-    lrm_generator_config:
-      target: src.models.lrm_mesh.InstantMesh
-      params:
-        encoder_feat_dim: 768
-        encoder_freeze: false
-        encoder_model_name: facebook/dino-vitb16
-        transformer_dim: 1024
-        transformer_layers: 16
-        transformer_heads: 16
-        triplane_low_res: 32
-        triplane_high_res: 64
-        triplane_dim: 80
-        rendering_samples_per_ray: 128
-        grid_res: 128
-        grid_scale: 2.1
-data:
-  target: src.data.objaverse.DataModuleFromConfig
-  params:
-    batch_size: 2
-    num_workers: 8
-    train:
-      target: src.data.objaverse.ObjaverseData
-      params:
-        root_dir: data/objaverse
-        meta_fname: filtered_obj_name.json
-        input_image_dir: rendering_random_32views
-        target_image_dir: rendering_random_32views
-        input_view_num: 6
-        target_view_num: 4
-        total_view_n: 32
-        fov: 50
-        camera_rotation: true
-        validation: false
-    validation:
-      target: src.data.objaverse.ValidationData
-      params:
-        root_dir: data/valid_samples
-        input_view_num: 6
-        input_image_size: 320
-        fov: 30
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 2000
-      save_top_k: -1
-      save_last: true
-  callbacks: {}
-  trainer:
-    benchmark: true
-    max_epochs: -1
-    val_check_interval: 1000
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    check_val_every_n_epoch: null   # if not set this, validation does not run

configs/instant-mesh-large.yaml DELETED Viewed

@@ -1,22 +0,0 @@
-model_config:
-  target: src.models.lrm_mesh.InstantMesh
-  params:
-    encoder_feat_dim: 768
-    encoder_freeze: false
-    encoder_model_name: facebook/dino-vitb16
-    transformer_dim: 1024
-    transformer_layers: 16
-    transformer_heads: 16
-    triplane_low_res: 32
-    triplane_high_res: 64
-    triplane_dim: 80
-    rendering_samples_per_ray: 128
-    grid_res: 128
-    grid_scale: 2.1
-infer_config:
-  unet_path: ckpts/diffusion_pytorch_model.bin
-  model_path: ckpts/instant_mesh_large.ckpt
-  texture_resolution: 1024
-  render_resolution: 512

configs/instant-nerf-base.yaml DELETED Viewed

@@ -1,21 +0,0 @@
-model_config:
-  target: src.models.lrm.InstantNeRF
-  params:
-    encoder_feat_dim: 768
-    encoder_freeze: false
-    encoder_model_name: facebook/dino-vitb16
-    transformer_dim: 1024
-    transformer_layers: 12
-    transformer_heads: 16
-    triplane_low_res: 32
-    triplane_high_res: 64
-    triplane_dim: 40
-    rendering_samples_per_ray: 96
-infer_config:
-  unet_path: ckpts/diffusion_pytorch_model.bin
-  model_path: ckpts/instant_nerf_base.ckpt
-  mesh_threshold: 10.0
-  mesh_resolution: 256
-  render_resolution: 384

configs/instant-nerf-large-train.yaml DELETED Viewed

@@ -1,65 +0,0 @@
-model:
-  base_learning_rate: 4.0e-04
-  target: src.model.MVRecon
-  params:
-    input_size: 320
-    render_size: 192
-    lrm_generator_config:
-      target: src.models.lrm.InstantNeRF
-      params:
-        encoder_feat_dim: 768
-        encoder_freeze: false
-        encoder_model_name: facebook/dino-vitb16
-        transformer_dim: 1024
-        transformer_layers: 16
-        transformer_heads: 16
-        triplane_low_res: 32
-        triplane_high_res: 64
-        triplane_dim: 80
-        rendering_samples_per_ray: 128
-data:
-  target: src.data.objaverse.DataModuleFromConfig
-  params:
-    batch_size: 2
-    num_workers: 8
-    train:
-      target: src.data.objaverse.ObjaverseData
-      params:
-        root_dir: data/objaverse
-        meta_fname: filtered_obj_name.json
-        input_image_dir: rendering_random_32views
-        target_image_dir: rendering_random_32views
-        input_view_num: 6
-        target_view_num: 4
-        total_view_n: 32
-        fov: 50
-        camera_rotation: true
-        validation: false
-    validation:
-      target: src.data.objaverse.ValidationData
-      params:
-        root_dir: data/valid_samples
-        input_view_num: 6
-        input_image_size: 320
-        fov: 30
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 1000
-      save_top_k: -1
-      save_last: true
-  callbacks: {}
-  trainer:
-    benchmark: true
-    max_epochs: -1
-    gradient_clip_val: 1.0
-    val_check_interval: 1000
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    check_val_every_n_epoch: null   # if not set this, validation does not run

configs/instant-nerf-large.yaml DELETED Viewed

@@ -1,21 +0,0 @@
-model_config:
-  target: src.models.lrm.InstantNeRF
-  params:
-    encoder_feat_dim: 768
-    encoder_freeze: false
-    encoder_model_name: facebook/dino-vitb16
-    transformer_dim: 1024
-    transformer_layers: 16
-    transformer_heads: 16
-    triplane_low_res: 32
-    triplane_high_res: 64
-    triplane_dim: 80
-    rendering_samples_per_ray: 128
-infer_config:
-  unet_path: ckpts/diffusion_pytorch_model.bin
-  model_path: ckpts/instant_nerf_large.ckpt
-  mesh_threshold: 10.0
-  mesh_resolution: 256
-  render_resolution: 384

configs/zero123plus-finetune.yaml DELETED Viewed

@@ -1,47 +0,0 @@
-model:
-  base_learning_rate: 1.0e-05
-  target: zero123plus.model.MVDiffusion
-  params:
-    drop_cond_prob: 0.1
-    stable_diffusion_config:
-      pretrained_model_name_or_path: sudo-ai/zero123plus-v1.2
-      custom_pipeline: ./zero123plus
-data:
-  target: src.data.objaverse_zero123plus.DataModuleFromConfig
-  params:
-    batch_size: 6
-    num_workers: 8
-    train:
-      target: src.data.objaverse_zero123plus.ObjaverseData
-      params:
-        root_dir: data/objaverse
-        meta_fname: lvis-annotations.json
-        image_dir: rendering_zero123plus
-        validation: false
-    validation:
-      target: src.data.objaverse_zero123plus.ObjaverseData
-      params:
-        root_dir: data/objaverse
-        meta_fname: lvis-annotations.json
-        image_dir: rendering_zero123plus
-        validation: true
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 1000
-      save_top_k: -1
-      save_last: true
-  callbacks: {}
-  trainer:
-    benchmark: true
-    max_epochs: -1
-    gradient_clip_val: 1.0
-    val_check_interval: 1000
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    check_val_every_n_epoch: null   # if not set this, validation does not run

examples/female.wav DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:89a4fa9a16b6463f852cf9424f72c3d3c87aa83010e89db534c53fcd1ae12c02
-size 1002030

examples/male.wav DELETED Viewed

Binary file (762 kB)

recomendation_pic/1.8.jpg DELETED Viewed

Binary file (99 kB)

recomendation_pic/1.9.jpg DELETED Viewed

Binary file (59.1 kB)

recomendation_pic/2.8.jpg DELETED Viewed

Binary file (71.6 kB)

recomendation_pic/2.9.png DELETED Viewed

Binary file (298 kB)

recomendation_pic/3.8.png DELETED Viewed

Binary file (439 kB)

recomendation_pic/3.9.png DELETED Viewed

Binary file (739 kB)

recomendation_pic/basket-2.png DELETED Viewed

Binary file (449 kB)

recomendation_pic/readme.md DELETED Viewed

File without changes

test_images/1.The Ambassadors.jpg DELETED Viewed

Binary file (78 kB)

test_images/2.Football Players.jpg DELETED Viewed

Binary file (86.1 kB)

test_images/3-square.jpg DELETED Viewed

Git LFS Details

SHA256: e2a8f2e93e275b853d47803136cf8a8dc10f62001779a8d903ceb9c3678cc481
Pointer size: 132 Bytes
Size of remote file: 1.06 MB

test_images/3.Along the River during the Qingming Festival.jpeg DELETED Viewed

Git LFS Details

SHA256: 3fc255019acfe629f0838ec225028f32f38b71ebd01a2abcaa8e261eae48a521
Pointer size: 132 Bytes
Size of remote file: 1.17 MB

test_images/MUS.png DELETED Viewed

Binary file (471 kB)

test_images/Picture0.png DELETED Viewed

Binary file (399 kB)

test_images/Picture1.png DELETED Viewed

Binary file (452 kB)

test_images/Picture10.png DELETED Viewed

Binary file (268 kB)

test_images/Picture2.png DELETED Viewed

Binary file (293 kB)