mylesgoose
/

Llama-3.2-11B-Vision-Instruct

Safetensors

mllama

Model card Files Files and versions Community

mylesgoose commited on Sep 28

Commit

a75702a

•

1 Parent(s): d056675

Update README.md

Browse files

Files changed (1) hide show

README.md +371 -0

README.md CHANGED Viewed

@@ -3,6 +3,377 @@ license: other
 license_name: other
 license_link: https://ai.meta.com/llama/license
 ---
 Repairing the chat template for the model.
 There is a slight problem with the original llama 3.1 3.2 chat template. If you train a model with that current chat template and if the training script builds the prompts
 from a json file with the chat tempalte the model starts to output as its first token <|eot_id|><|start_header_id|>assistant<|end_header_id|> and naturally the script will then halt generation.

 license_name: other
 license_link: https://ai.meta.com/llama/license
 ---
+to load the model you can do something like this, copy below to a python file and then run it. you must load an image and then type in the top by a message and hit enter.:
+```python
+import torch
+from datetime import date
+from PIL import Image, ImageTk
+from transformers import MllamaForConditionalGeneration, AutoProcessor
+import tkinter as tk
+from tkinter import filedialog, ttk, messagebox
+import logging
+import json
+import os
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Get today's date
+date_string: str = date.today().strftime("%d %b %Y")
+model_id = "mylesgoose/Llama-3.2-11B-Vision-Instruct"
+# Load the model and processor
+model = MllamaForConditionalGeneration.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+processor = AutoProcessor.from_pretrained(model_id)
+class Application(tk.Frame):
+    def __init__(self, master=None):
+        super().__init__(master)
+        self.master = master
+        self.pack(fill="both", expand=True)
+        self.current_images = []   # Images for the current message
+        self.chat_sessions = {}    # Dictionary to hold multiple chat sessions
+        self.active_session = "Session 1"
+        self.create_widgets()
+        self.update_status("Application started.")
+    def create_widgets(self):
+        # Create a style for ttk widgets
+        style = ttk.Style()
+        style.configure('TButton', font=('Helvetica', 10))
+        style.configure('TLabel', font=('Helvetica', 10))
+        style.configure('TNotebook.Tab', font=('Helvetica', 10))
+        # Create a menu bar
+        menu_bar = tk.Menu(self.master)
+        self.master.config(menu=menu_bar)
+        # Create the File menu
+        file_menu = tk.Menu(menu_bar, tearoff=0)
+        menu_bar.add_cascade(label="File", menu=file_menu)
+        file_menu.add_command(label="New Session", command=self.create_new_session)
+        file_menu.add_command(label="Load Session", command=self.load_chat_session)
+        file_menu.add_command(label="Save Session", command=self.save_current_chat)
+        file_menu.add_separator()
+        file_menu.add_command(label="Exit", command=self.on_closing)
+        # Create a Notebook for multiple sessions
+        self.notebook = ttk.Notebook(self)
+        self.notebook.pack(side="top", fill="both", expand=True)
+        self.notebook.bind("<<NotebookTabChanged>>", self.change_session)
+        # Initialize the first session
+        self.create_new_session()
+        # Status bar
+        self.status_bar = ttk.Label(self, text="Status: Ready", anchor="w")
+        self.status_bar.pack(side="bottom", fill="x")
+    def create_new_session(self, session_name=None):
+        if not session_name:
+            session_name = f"Session {len(self.chat_sessions) + 1}"
+        frame = ttk.Frame(self.notebook)
+        self.notebook.add(frame, text=session_name)
+        self.chat_sessions[session_name] = {
+            "frame": frame,
+            "chat_history": [],
+            "widgets": {}
+        }
+        self.active_session = session_name
+        self.build_session_widgets(frame, session_name)
+    def build_session_widgets(self, frame, session_name):
+        widgets = {}
+        # Text Entry
+        widgets['text_entry_label'] = ttk.Label(frame, text="Enter your message:")
+        widgets['text_entry_label'].pack(side="top", anchor="w", padx=10, pady=(10, 0))
+        widgets['text_entry'] = tk.Text(frame, height=5, width=80)
+        widgets['text_entry'].pack(side="top", fill="x", padx=10, pady=5)
+        widgets['text_entry'].bind("<Return>", self.generate_text_from_entry)
+        # Buttons Frame
+        widgets['buttons_frame'] = ttk.Frame(frame)
+        widgets['buttons_frame'].pack(side="top", fill="x", padx=10, pady=5)
+        widgets['load_image_button'] = ttk.Button(widgets['buttons_frame'], text="Load Image", command=self.load_image)
+        widgets['load_image_button'].pack(side="left", padx=5)
+        widgets['remove_image_button'] = ttk.Button(widgets['buttons_frame'], text="Remove Images", command=self.remove_images)
+        widgets['remove_image_button'].pack(side="left", padx=5)
+        widgets['generate_text_button'] = ttk.Button(widgets['buttons_frame'], text="Send", command=self.generate_text)
+        widgets['generate_text_button'].pack(side="left", padx=5)
+        widgets['reset_button'] = ttk.Button(widgets['buttons_frame'], text="Reset Chat", command=self.reset_chat)
+        widgets['reset_button'].pack(side="left", padx=5)
+        widgets['save_chat_button'] = ttk.Button(widgets['buttons_frame'], text="Save Chat", command=self.save_current_chat)
+        widgets['save_chat_button'].pack(side="left", padx=5)
+        # Chat History
+        widgets['chat_history_frame'] = ttk.Frame(frame)
+        widgets['chat_history_frame'].pack(side="top", fill="both", expand=True, padx=10, pady=5)
+        widgets['chat_history_canvas'] = tk.Canvas(widgets['chat_history_frame'])
+        widgets['chat_history_canvas'].pack(side="left", fill="both", expand=True)
+        widgets['chat_history_scrollbar'] = ttk.Scrollbar(widgets['chat_history_frame'], orient="vertical", command=widgets['chat_history_canvas'].yview)
+        widgets['chat_history_scrollbar'].pack(side="right", fill="y")
+        widgets['chat_history_canvas'].configure(yscrollcommand=widgets['chat_history_scrollbar'].set)
+        widgets['chat_history_container'] = ttk.Frame(widgets['chat_history_canvas'])
+        widgets['chat_history_canvas'].create_window((0, 0), window=widgets['chat_history_container'], anchor='nw')
+        widgets['chat_history_container'].bind("<Configure>", lambda event: widgets['chat_history_canvas'].configure(scrollregion=widgets['chat_history_canvas'].bbox("all")))
+        self.chat_sessions[session_name]['widgets'] = widgets
+    def change_session(self, event):
+        selected_tab = event.widget.select()
+        self.active_session = event.widget.tab(selected_tab, "text")
+        self.update_status(f"Switched to {self.active_session}")
+    def update_status(self, message):
+        self.status_bar.config(text=f"Status: {message}")
+        logging.info(message)
+    def load_image(self):
+        image_paths = filedialog.askopenfilenames()
+        for image_path in image_paths:
+            image = Image.open(image_path)
+            image.thumbnail((100, 100))
+            photo = ImageTk.PhotoImage(image)
+            label = tk.Label(self.chat_sessions[self.active_session]['widgets']['chat_history_container'], image=photo)
+            label.image = photo
+            label.pack(side="top", anchor="w", padx=5, pady=5)
+            self.current_images.append({'image': image, 'path': image_path})
+        self.update_status(f"Loaded {len(image_paths)} image(s).")
+    def remove_images(self):
+        self.current_images = []
+        self.update_status("All images removed from the current message.")
+    def generate_text(self, event=None):
+        user_text = self.chat_sessions[self.active_session]['widgets']['text_entry'].get("1.0", tk.END).strip()
+        if not user_text and not self.current_images:
+            self.update_status("Please enter a message or load images.")
+            return
+        # Display user's message and images in chat history
+        self.display_message("User", user_text, self.current_images)
+        session_data = self.chat_sessions[self.active_session]
+        # Prepare message content
+        message_content = []
+        if self.current_images:
+            message_content.append({"type": "image"})
+        # Add the text content
+        message_content.append({"type": "text", "text": user_text})
+        # Append the message to the chat history, including image paths
+        session_data['chat_history'].append({
+            "role": "user",
+            "content": message_content,
+            "images": [img['path'] for img in self.current_images]  # Store image paths
+        })
+        # Build messages for the processor
+        messages = [{"role": message["role"], "content": message["content"]} for message in session_data['chat_history']] + \
+            [{"role": "system", "content": [{"You are a helpful and creative AI assistant."}]}]
+        try:
+            # Generate the input text for the processor
+            input_text = processor.apply_chat_template(messages, add_generation_prompt=True, date_string=date_string)
+            # Build all_images by collecting images from chat history
+            all_images = []
+            for message in session_data['chat_history']:
+                if 'images' in message and message['images']:
+                    for img_path in message['images']:
+                        try:
+                            img = Image.open(img_path)
+                            all_images.append(img)
+                        except Exception as e:
+                            logging.error(f"Error loading image {img_path}: {e}")
+                            self.update_status(f"Error loading image {img_path}")
+            # Ensure the number of images matches the number of image tokens
+            total_image_tokens = input_text.count(processor.image_token)
+            if total_image_tokens != len(all_images):
+                self.update_status(f"Mismatch between image tokens ({total_image_tokens}) and images provided ({len(all_images)}).")
+                return
+            # Prepare inputs for the model
+            inputs = processor(images=all_images, text=input_text, return_tensors="pt").to(model.device)
+            # Generate the assistant's response
+            output = model.generate(**inputs, max_new_tokens=1000)
+            generated_text = processor.decode(output[0][inputs['input_ids'].shape[-1]:])
+            # Update chat history and UI with the assistant's response
+            session_data['chat_history'].append({
+                "role": "assistant",
+                "content": [{"type": "text", "text": generated_text}],
+                "images": []
+            })
+            self.display_message("Assistant", generated_text)
+            # Clear the text entry and current images
+            self.chat_sessions[self.active_session]['widgets']['text_entry'].delete("1.0", tk.END)
+            self.current_images = []
+        except Exception as e:
+            logging.error(f"Error during text generation: {e}")
+            self.update_status("An error occurred during text generation.")
+    def display_message(self, sender, text, images=[]):
+        container = self.chat_sessions[self.active_session]['widgets']['chat_history_container']
+        frame = ttk.Frame(container)
+        frame.pack(fill="x", pady=5)
+        label = ttk.Label(frame, text=f"{sender}:", font=('Helvetica', 10, 'bold'))
+        label.pack(side="top", anchor="w")
+        if images:
+            images_frame = ttk.Frame(frame)
+            images_frame.pack(side="top", fill="x")
+            for img_item in images:
+                if isinstance(img_item, dict):
+                    img = img_item['image']
+                elif isinstance(img_item, str):
+                    try:
+                        img = Image.open(img_item)
+                    except Exception as e:
+                        logging.error(f"Error loading image {img_item}: {e}")
+                        self.update_status(f"Error loading image {img_item}")
+                        continue
+                else:
+                    img = img_item
+                image = img.copy()
+                image.thumbnail((100, 100))
+                photo = ImageTk.PhotoImage(image)
+                img_label = ttk.Label(images_frame, image=photo)
+                img_label.image = photo
+                img_label.pack(side="left", padx=5)
+        message_label = ttk.Label(frame, text=text, wraplength=500, justify="left")
+        message_label.pack(side="top", anchor="w")
+        # Scroll to the bottom
+        canvas = self.chat_sessions[self.active_session]['widgets']['chat_history_canvas']
+        canvas.update_idletasks()
+        canvas.yview_moveto(1.0)
+    def generate_text_from_entry(self, event=None):
+        self.generate_text()
+        return "break"  # Prevents the Text widget from inserting a newline
+    def reset_chat(self):
+        confirm = messagebox.askyesno("Reset Chat", "Are you sure you want to reset the chat?")
+        if confirm:
+            session_data = self.chat_sessions[self.active_session]
+            session_data['chat_history'] = []
+            self.current_images = []
+            # Clear chat history UI
+            container = session_data['widgets']['chat_history_container']
+            for widget in container.winfo_children():
+                widget.destroy()
+            self.update_status("Chat reset.")
+    def save_current_chat(self):
+        session_data = self.chat_sessions[self.active_session]
+        if not session_data['chat_history']:
+            messagebox.showinfo("Save Chat", "No chat history to save.")
+            return
+        filename = filedialog.asksaveasfilename(defaultextension=".json", initialfile=f"{self.active_session}.json", filetypes=[("JSON files", "*.json")])
+        if filename:
+            self.save_chat_history(filename)
+            self.update_status(f"Chat history saved to {filename}")
+    def save_chat_history(self, filename):
+        session_data = self.chat_sessions[self.active_session]
+        with open(filename, "w") as f:
+            json.dump(session_data['chat_history'], f)
+    def load_chat_session(self):
+        filename = filedialog.askopenfilename(defaultextension=".json", filetypes=[("JSON files", "*.json")])
+        if filename:
+            session_name = os.path.splitext(os.path.basename(filename))[0]
+            self.create_new_session(session_name)
+            self.load_chat_history(filename, session_name)
+            self.update_status(f"Chat session {session_name} loaded.")
+    def load_chat_history(self, filename, session_name):
+        with open(filename, "r") as f:
+            chat_history = json.load(f)
+        session_data = self.chat_sessions[session_name]
+        session_data['chat_history'] = chat_history
+        # Update UI with loaded chat history
+        for message in chat_history:
+            sender = message['role'].capitalize()
+            content = message['content']
+            images = []
+            if 'images' in message and message['images']:
+                images = message['images']
+            text = ""
+            for item in content:
+                if item.get('type') == 'text':
+                    text = item.get('text', '')
+                    break
+            self.display_message(sender, text, images)
+    def on_closing(self):
+        if messagebox.askokcancel("Quit", "Do you want to quit?"):
+            self.master.destroy()
+root = tk.Tk()
+root.title("LLM Chat Application")
+app = Application(master=root)
+root.protocol("WM_DELETE_WINDOW", app.on_closing)
+app.mainloop()
+```
 Repairing the chat template for the model.
 There is a slight problem with the original llama 3.1 3.2 chat template. If you train a model with that current chat template and if the training script builds the prompts
 from a json file with the chat tempalte the model starts to output as its first token <|eot_id|><|start_header_id|>assistant<|end_header_id|> and naturally the script will then halt generation.