Spaces:

SunderAli17
/

Image_captioning_Phi_Vision

Running on Zero

App Files Files Community

SunderAli17 commited on Aug 28

Commit

e337c90

•

1 Parent(s): 00d2a60

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -14

app.py CHANGED Viewed

@@ -15,21 +15,29 @@ processors = {
     "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
 }
-DESCRIPTION = "[Phi-3.5-vision Demo](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)"
 kwargs = {}
 kwargs['torch_dtype'] = torch.bfloat16
-user_prompt = '<|user|>\n'
-assistant_prompt = '<|assistant|>\n'
-prompt_suffix = "<|end|>\n"
 @spaces.GPU
 def run_example(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instruct"):
     model = models[model_id]
     processor = processors[model_id]
-    prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
     image = Image.fromarray(image).convert("RGB")
     inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
@@ -43,16 +51,20 @@ def run_example(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instr
                                     clean_up_tokenization_spaces=False)[0]
     return response
-css = """
-  #output {
-    height: 500px;
-    overflow: auto;
-    border: 1px solid #ccc;
-  }
 """
-with gr.Blocks(css=css) as demo:
-    gr.Markdown(DESCRIPTION)
     with gr.Tab(label="Phi-3.5 Input"):
         with gr.Row():
             with gr.Column():

     "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
 }
+MARKDOWN = """
+This demo utilizes <a href="https://huggingface.co/microsoft/Phi-3.5-vision-instruct">Phi-3.5-Vision Instruct</a> by @Microsoft.
+Try out with different images and generate captions. Do provide your feedback.
+Model Card is acquired from <a href="https://huggingface.co/microsoft/Phi-3.5-vision-instruct"> Microsoft's Phi Vision Instruct</a>
+**Demo by [Sunder Ali Khowaja](https://sander-ali.github.io) - [X](https://x.com/SunderAKhowaja) -[Github](https://github.com/sander-ali) -[Hugging Face](https://huggingface.co/SunderAli17)**
+"""
 kwargs = {}
 kwargs['torch_dtype'] = torch.bfloat16
+promptu = '<|user|>\n'
+prompta = '<|assistant|>\n'
+prompts = "<|end|>\n"
 @spaces.GPU
 def run_example(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instruct"):
     model = models[model_id]
     processor = processors[model_id]
+    prompt = f"{promptu}<|image_1|>\n{text_input}{prompts}{prompta}"
     image = Image.fromarray(image).convert("RGB")
     inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
                                     clean_up_tokenization_spaces=False)[0]
     return response
+theme = gr.themes.Soft(
+    font=[gr.themes.GoogleFont('Pacifico'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
+)
+js_func = """
+function refresh() {
+    const url = new URL(window.location);
+    if (url.searchParams.get('__theme') !== 'dark') {
+        url.searchParams.set('__theme', 'dark');
+        window.location.href = url.href;
+    }
+}
 """
+with gr.Blocks(js=js_func, theme=theme) as demo:
+    gr.Markdown(MARKDOWN)
     with gr.Tab(label="Phi-3.5 Input"):
         with gr.Row():
             with gr.Column():