Spaces:

ndurner
/

amz_bedrock_chat

Sleeping

App Files Files Community

ndurner commited on Aug 16, 2024

Commit

d2d66c1

1 Parent(s): a30711d

new style chatbot, PDF support (taken from OAI chatbot)

Browse files

Files changed (3) hide show

app.py +8 -77
llm.py +100 -17
requirements.txt +3 -2

app.py CHANGED Viewed

@@ -3,49 +3,12 @@ import json
 import os
 import boto3
-from doc2json import process_docx
 from settings_mgr import generate_download_settings_js, generate_upload_settings_js
-from llm import LLM, log_to_console, image_embed_prefix
 from botocore.config import Config
 dump_controls = False
-def add_text(history, text):
-    if text:
-        history = history + [(text, None)]
-    return history, gr.Textbox(value="", interactive=False)
-def add_file(history, file):
-    if file.name.endswith(".docx"):
-        content = process_docx(file.name)
-    else:
-        with open(file.name, mode="rb") as f:
-            content = f.read()
-            if isinstance(content, bytes):
-                content = content.decode('utf-8', 'replace')
-            else:
-                content = str(content)
-    fn = os.path.basename(file.name)
-    history = history + [(f'```{fn}\n{content}\n```', None)]
-    return history
-def add_img(history, files):
-    for file in files:
-        if log_to_console:
-            print(f"add_img {file.name}")
-        history = history + [(image_embed_prefix + file.name, None)]
-        gr.Info(f"Image added as {file.name}")
-    return history
-def submit_text(txt_value):
-    return add_text([chatbot, txt_value], [chatbot, txt_value])
 def undo(history):
     history.pop()
     return history
@@ -92,14 +55,12 @@ def bot(message, history, aws_access, aws_secret, aws_token, system_prompt, temp
         response = br.invoke_model(body=body, modelId=f"{model}",
                                 accept="application/json", contentType="application/json")
         response_body = json.loads(response.get('body').read())
-        br_result = llm.read_response(response_body)
-        history[-1][1] = br_result
     except Exception as e:
         raise gr.Error(f"Error: {str(e)}")
-    return "", history
 def import_history(history, file):
     with open(file.name, mode="rb") as f:
@@ -186,34 +147,11 @@ with gr.Blocks() as demo:
         dl_settings_button.click(None, controls, js=generate_download_settings_js("amz_chat_settings.bin", control_ids))
         ul_settings_button.click(None, None, None, js=generate_upload_settings_js(control_ids))
-    chatbot = gr.Chatbot(
-        [],
-        elem_id="chatbot",
-        show_copy_button=True,
-        height=350
-    )
-    with gr.Row():
-        txt = gr.TextArea(
-            scale=4,
-            show_label=False,
-            placeholder="Enter text and press enter, or upload a file",
-            container=False,
-            lines=3,
-        )
-        submit_btn = gr.Button("🚀 Send", scale=0)
-        submit_click = submit_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
-            bot, [txt, chatbot, aws_access, aws_secret, aws_token, system_prompt, temp, max_tokens, model, region], [txt, chatbot],
-        )
-        submit_click.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
-    with gr.Row():
-        btn = gr.UploadButton("📁 Upload", size="sm")
-        img_btn = gr.UploadButton("🖼️ Upload", size="sm", file_count="multiple", file_types=["image"])
-        undo_btn = gr.Button("↩️ Undo")
-        undo_btn.click(undo, inputs=[chatbot], outputs=[chatbot])
-        clear = gr.ClearButton(chatbot, value="🗑️ Clear")
     if dump_controls:
         with gr.Row():
@@ -273,11 +211,4 @@ with gr.Blocks() as demo:
         """)
         import_button.upload(import_history, inputs=[chatbot, import_button], outputs=[chatbot, system_prompt])
-    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
-        bot, [txt, chatbot, aws_access, aws_secret, aws_token, system_prompt, temp, max_tokens, model, region], [txt, chatbot],
-    )
-    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
-    file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False, postprocess=False)
-    img_msg = img_btn.upload(add_img, [chatbot, img_btn], [chatbot], queue=False, postprocess=False)
 demo.queue().launch()

 import os
 import boto3
 from settings_mgr import generate_download_settings_js, generate_upload_settings_js
+from llm import LLM, log_to_console
 from botocore.config import Config
 dump_controls = False
 def undo(history):
     history.pop()
     return history
         response = br.invoke_model(body=body, modelId=f"{model}",
                                 accept="application/json", contentType="application/json")
         response_body = json.loads(response.get('body').read())
+        result = llm.read_response(response_body)
     except Exception as e:
         raise gr.Error(f"Error: {str(e)}")
+    return result
 def import_history(history, file):
     with open(file.name, mode="rb") as f:
         dl_settings_button.click(None, controls, js=generate_download_settings_js("amz_chat_settings.bin", control_ids))
         ul_settings_button.click(None, None, None, js=generate_upload_settings_js(control_ids))
+    chat = gr.ChatInterface(fn=bot, multimodal=True, additional_inputs=controls, retry_btn = None, autofocus = False)
+    chat.textbox.file_count = "multiple"
+    chatbot = chat.chatbot
+    chatbot.show_copy_button = True
+    chatbot.height = 350
     if dump_controls:
         with gr.Row():
         """)
         import_button.upload(import_history, inputs=[chatbot, import_button], outputs=[chatbot, system_prompt])
 demo.queue().launch()

llm.py CHANGED Viewed

@@ -1,12 +1,51 @@
 from abc import ABC, abstractmethod
 from typing import Type, TypeVar
 import base64
 import json
 # constants
-image_embed_prefix = "🖼️🆙 "
 log_to_console = False
 def encode_image(image_data):
     """Generates a prefix for image base64 data in the required format for the
     four known image formats: png, jpeg, gif, and webp.
@@ -42,6 +81,38 @@ def encode_image(image_data):
             "media_type": "image/" + image_type,
             "data": base64.b64encode(image_data).decode('utf-8')}
 LLMClass = TypeVar('LLMClass', bound='LLM')
 class LLM(ABC):
     @abstractmethod
@@ -68,26 +139,25 @@ class Claude(LLM):
         user_msg_parts = []
         for human, assi in history:
             if human:
-                if human.startswith(image_embed_prefix):
-                    with open(human.lstrip(image_embed_prefix), mode="rb") as f:
-                        content = f.read()
-                    user_msg_parts.append({"type": "image",
-                                            "source": encode_image(content)})
                 else:
                     user_msg_parts.append({"type": "text", "text": human})
-            if assi:
                 if user_msg_parts:
                     history_claude_format.append({"role": "user", "content": user_msg_parts})
                     user_msg_parts = []
                 history_claude_format.append({"role": "assistant", "content": assi})
-        if message:
-            user_msg_parts.append({"type": "text", "text": human})
-        if user_msg_parts:
-            history_claude_format.append({"role": "user", "content": user_msg_parts})
         if log_to_console:
             print(f"br_prompt: {str(history_claude_format)}")
@@ -111,12 +181,25 @@ class Mistral(LLM):
     def generate_body(message, history, system_prompt, temperature, max_tokens):
         prompt = "<s>"
         for human, assi in history:
-            if prompt is not None:
-                prompt += f"[INST] {human} [/INST]\n"
             if assi is not None:
-                prompt += f"{assi}</s>\n"
-        if message:
-            prompt += f"[INST] {message} [/INST]"
         if log_to_console:
             print(f"br_prompt: {str(prompt)}")

 from abc import ABC, abstractmethod
 from typing import Type, TypeVar
 import base64
+import os
 import json
+from doc2json import process_docx
+import fitz
+from PIL import Image
+import io
 # constants
 log_to_console = False
+def process_pdf_img(pdf_fn: str):
+    pdf = fitz.open(pdf_fn)
+    message_parts = []
+    for page in pdf.pages():
+        # Create a transformation matrix for rendering at the calculated scale
+        mat = fitz.Matrix(0.6, 0.6)
+        # Render the page to a pixmap
+        pix = page.get_pixmap(matrix=mat, alpha=False)
+        # Convert pixmap to PIL Image
+        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        # Convert PIL Image to bytes
+        img_byte_arr = io.BytesIO()
+        img.save(img_byte_arr, format='PNG')
+        img_byte_arr = img_byte_arr.getvalue()
+        # Encode image to base64
+        base64_encoded = base64.b64encode(img_byte_arr).decode('utf-8')
+        # Append the message part
+        message_parts.append({
+            "type": "text",
+            "text": f"Page {page.number} of file '{pdf_fn}'"
+        })
+        message_parts.append({"type": "image", "source": {"type": "base64",
+            "media_type": "image/png",
+            "data": base64_encoded}})
+    pdf.close()
+    return message_parts
 def encode_image(image_data):
     """Generates a prefix for image base64 data in the required format for the
     four known image formats: png, jpeg, gif, and webp.
             "media_type": "image/" + image_type,
             "data": base64.b64encode(image_data).decode('utf-8')}
+def encode_file(fn: str) -> list:
+    user_msg_parts = []
+    if fn.endswith(".docx"):
+        user_msg_parts.append({"type": "text", "text": process_docx(fn)})
+    elif fn.endswith(".pdf"):
+        user_msg_parts.extend(process_pdf_img(fn))
+    else:
+        with open(fn, mode="rb") as f:
+            content = f.read()
+        isImage = False
+        if isinstance(content, bytes):
+            try:
+                # try to add as image
+                content = encode_image(content)
+                isImage = True
+            except:
+                # not an image, try text
+                content = content.decode('utf-8', 'replace')
+        else:
+            content = str(content)
+        if isImage:
+            user_msg_parts.append({"type": "image",
+                                "source": content})
+        else:
+            fname = os.path.basename(fn)
+            user_msg_parts.append({"type": "text", "text": f"``` {fname}\n{content}\n```"})
+    return user_msg_parts
 LLMClass = TypeVar('LLMClass', bound='LLM')
 class LLM(ABC):
     @abstractmethod
         user_msg_parts = []
         for human, assi in history:
             if human:
+                if type(human) is tuple:
+                    user_msg_parts.extend(encode_file(human[0]))
                 else:
                     user_msg_parts.append({"type": "text", "text": human})
+            if assi is not None:
                 if user_msg_parts:
                     history_claude_format.append({"role": "user", "content": user_msg_parts})
                     user_msg_parts = []
                 history_claude_format.append({"role": "assistant", "content": assi})
+        if message['text']:
+            user_msg_parts.append({"type": "text", "text": message['text']})
+        if message['files']:
+            for file in message['files']:
+                user_msg_parts.extend(encode_file(file['path']))
+        history_claude_format.append({"role": "user", "content": user_msg_parts})
+        user_msg_parts = []
         if log_to_console:
             print(f"br_prompt: {str(history_claude_format)}")
     def generate_body(message, history, system_prompt, temperature, max_tokens):
         prompt = "<s>"
         for human, assi in history:
+            if human:
+                if type(human) is tuple:
+                    prompt += f"[INST] {encode_file(human[0])} [/INST]"
+                else:
+                    prompt += f"[INST] {human} [/INST]"
             if assi is not None:
+                prompt += f"{assi}</s>"
+        if message['text'] or message['files']:
+            prompt += "[INST] "
+            if message['text']:
+                prompt += message['text']
+            if message['files']:
+                for file in message['files']:
+                    prompt += f"{encode_file(file['path'])}\n"
+            prompt += " [/INST]"
         if log_to_console:
             print(f"br_prompt: {str(prompt)}")

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
-gradio>=4.1
 langchain
 boto3>1.34.54
-lxml

+gradio>=4.38.1
 langchain
 boto3>1.34.54
+lxml
+PyMuPDF