Spaces:

louiecerv
/

american_sign_language_neuralnet

Running

App Files Files Community

louiecerv commited on 1 day ago

Commit

c0f9b0f

1 Parent(s): 146f25a

sync to remote

Browse files

Files changed (1) hide show

app.py +141 -125

app.py CHANGED Viewed

@@ -1,137 +1,153 @@
 import streamlit as st
 import torch
-import torch.nn as nn
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from torchvision import transforms
 from PIL import Image
-import requests
-import pandas as pd
-# Enable GPU if available
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-st.write(f"Enabled GPU = {torch.cuda.is_available()}")
 MODEL_REPO_ID = "louiecerv/amer_sign_lang_neuralnet"
-DATASET_REPO_ID = "louiecerv/american_sign_language"
-# Load dataset from Hugging Face API
-def load_dataset_from_api(repo_id, split, offset, length):
-    url = f"https://datasets-server.huggingface.co/rows?dataset={repo_id}&config=default&split={split}&offset={offset}&length={length}"
-    response = requests.get(url)
-    data = response.json()
-    df = pd.DataFrame(data)
-    return df
-train_data = load_dataset_from_api(DATASET_REPO_ID, "train", 0, 1000)
-valid_data = load_dataset_from_api(DATASET_REPO_ID, "validation", 0, 100)
-# Define the model architecture (must match training code)
-class SimpleNN(nn.Module):
-    def __init__(self, input_size=28*28, hidden_size=512, num_classes=26):
-        super(SimpleNN, self).__init__()
-        self.flatten = nn.Flatten()
-        self.fc1 = nn.Linear(input_size, hidden_size)
-        self.relu1 = nn.ReLU()
-        self.fc2 = nn.Linear(hidden_size, hidden_size)
-        self.relu2 = nn.ReLU()
-        self.fc3 = nn.Linear(hidden_size, num_classes)
-    def forward(self, x):
-        x = self.flatten(x)
-        x = self.fc1(x)
-        x = self.relu1(x)
-        x = self.fc2(x)
-        x = self.relu2(x)
-        x = self.fc3(x)
-        return x
-# Load pre-trained model from Hugging Face (pickle file)
-@st.cache_resource
-def load_model():
-    url = f"https://huggingface.co/{MODEL_REPO_ID}/resolve/main/trained_model.pkl"
-    response = requests.get(url)
-    with open("trained_model.pkl", "wb") as f:
-        f.write(response.content)
-    with open("trained_model.pkl", "rb") as f:
-        model = pickle.load(f)
-    model.to(device)
-    model.eval()  # Set model to evaluation mode
-    return model
-model = load_model()
-# Custom dataset class
-class ASLDataset(Dataset):
-    def __init__(self, data):
-        self.data = data
-        self.transform = transforms.Compose([
             transforms.Grayscale(num_output_channels=1),
             transforms.Resize((28, 28)),
             transforms.ToTensor(),
-            transforms.Normalize(mean=[0.5], std=[0.5])
         ])
-    def __len__(self):
-        return len(self.data)
-    def __getitem__(self, idx):
-        sample = self.data.iloc[idx]
-        image_path = sample['image']
-        image = Image.open(requests.get(image_path, stream=True).raw).convert("L")
-        label = sample['label']
-        image = self.transform(image)
-        return image, torch.tensor(label, dtype=torch.long)
-train_dataset = ASLDataset(train_data)
-valid_dataset = ASLDataset(valid_data)
-train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
-valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
-# Prediction function
-def predict_image(model, image):
-    image = image.view(-1, 28*28).to(device)  # Flatten image to match model input
-    model.eval()
-    with torch.no_grad():
-        output = model(image)
-        predicted_class = torch.argmax(output, dim=1).item()
-    return chr(predicted_class + 65)  # Convert index to ASL letter (A-Z)
 # Streamlit App
-st.title("American Sign Language Recognition Using CNN")
-# Tabs: Dataset, Model, Prediction
-tab1, tab2, tab3 = st.tabs(["Dataset", "Model", "Prediction"])
-# Dataset Tab
-with tab1:
-    st.header("Dataset Overview")
-    st.write("Displaying sample images from the training dataset.")
-    col1, col2, col3, col4, col5 = st.columns(5)
-    for i in range(5):
-        image, label = train_dataset[i]
-        img = transforms.ToPILImage()(image.cpu())
-        with [col1, col2, col3, col4, col5][i]:
-            st.image(img, caption=f"Label: {chr(label.item() + 65)}", use_container_width=True)
-with tab2:
-    st.header("Model Training")
-    st.write("Training is done offline. The pre-trained model is loaded from Hugging Face.")
-    st.write(model)
-with tab3:
-    st.header("Make a Prediction")
-    uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
     if uploaded_file is not None:
-        image = Image.open(uploaded_file).convert("L")
-        st.image(image, caption="Original Image", use_container_width=True)
-        transform = transforms.Compose([
-                transforms.Grayscale(num_output_channels=1),
-                transforms.Resize((28, 28)),
-                transforms.ToTensor(),
-                transforms.Normalize(mean=[0.5], std=[0.5])
-            ])
-        processed_image = transform(image)
-        predicted_class = predict_image(model, processed_image)
-        st.success(f"Predicted ASL Letter: {predicted_class}")

 import streamlit as st
 import torch
+import numpy as np
 from PIL import Image
+import pickle
+import torchvision.transforms as transforms
+from huggingface_hub import hf_hub_download
+from datasets import load_dataset
+# Model repository ID
 MODEL_REPO_ID = "louiecerv/amer_sign_lang_neuralnet"
+MODEL_FILENAME = "trained_model.pkl"  # The filename of your model on Hugging Face
+# Load dataset from Hugging Face
+DATASET_NAME = "louiecerv/american_sign_language"  # Replace with your dataset name
+dataset = load_dataset(DATASET_NAME, split="train")
+def preprocess_image(image: Image) -> tuple[torch.Tensor, Image]:
+    """
+    Preprocess the image by converting it to grayscale, resizing it to 28x28,
+    normalizing the pixel values, and converting it to a tensor.
+    Args:
+        image (Image): The input image.
+    Returns:
+        tuple[torch.Tensor, Image]: A tuple containing the preprocessed image tensor and the processed PIL image.
+    """
+    try:
+        transform = transforms.Compose([
             transforms.Grayscale(num_output_channels=1),
             transforms.Resize((28, 28)),
             transforms.ToTensor(),
+            transforms.Normalize(mean=0.5, std=0.5)
         ])
+        tensor_image = transform(image)
+        # Convert the tensor back to a PIL Image for display
+        tensor_image_pil = tensor_image.squeeze().cpu().numpy()  # Remove batch dimension and convert to numpy
+        tensor_image_pil = (tensor_image_pil * 0.5 + 0.5) * 255  # Unnormalize
+        tensor_image_pil = tensor_image_pil.astype(np.uint8)  # Convert to uint8 for PIL
+        processed_image_pil = Image.fromarray(tensor_image_pil)
+        return tensor_image, processed_image_pil
+    except Exception as e:
+        st.error(f"Error preprocessing image: {e}")
+        return None, None
+def load_model(repo_id: str, filename: str) -> torch.nn.Module:
+    """
+    Load the model from Hugging Face Hub.
+    Args:
+        repo_id (str): The repository ID of the model.
+        filename (str): The filename of the model.
+    Returns:
+        torch.nn.Module: The loaded model.
+    """
+    try:
+        model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+        with open(model_path, "rb") as f:
+            model = pickle.load(f)
+        return model
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        return None
+def make_prediction(model: torch.nn.Module, image_tensor: torch.Tensor) -> str:
+    """
+    Make a prediction using the loaded model and the preprocessed image tensor.
+    Args:
+        model (torch.nn.Module): The loaded model.
+        image_tensor (torch.Tensor): The preprocessed image tensor.
+    Returns:
+        str: The predicted letter.
+    """
+    try:
+        model.eval()
+        with torch.no_grad():
+            # Add batch dimension if not already present
+            if len(image_tensor.shape) == 3:
+                image_tensor = image_tensor.unsqueeze(0)
+            prediction = model(image_tensor)
+        predicted_class = torch.argmax(prediction).item()
+        predicted_letter = chr(predicted_class + ord('A'))
+        return predicted_letter
+    except Exception as e:
+        st.error(f"Error making prediction: {e}")
+        return None
+def tensor_to_image(pixel_list):
+    """Converts a tensor to a displayable image."""
+    array = np.array(pixel_list).reshape(28, 28)
+    array = (array * 0.5 + 0.5) * 255  # Assuming mean=0.5, std=0.5
+    array = np.clip(array, 0, 255).astype(np.uint8)
+    return Image.fromarray(array)
 # Streamlit App
+st.title("American Sign Language App")
+# Create tabs
+tabs = ["Dataset", "Prediction"]
+selected_tab = st.sidebar.radio("Select Tab", tabs)
+if selected_tab == "Dataset":
+    st.header("Dataset")
+    st.write("Displaying the first 20 images from the dataset.")
+    # Create a grid layout
+    cols = 5  # Number of columns
+    rows = 4  # Number of rows
+    num_images = cols * rows
+    # Display images in a grid
+    image_list = dataset[:num_images]["pixel_values"]
+    labels = dataset[:num_images]["label"]
+    # Display images using Streamlit columns
+    for row in range(rows):
+        columns = st.columns(cols)
+        for col in range(cols):
+            index = row * cols + col
+            image = tensor_to_image(image_list[index])
+            columns[col].image(image, caption=f"Label: {chr(labels[index] + ord('A'))}", use_container_width=True)
+elif selected_tab == "Prediction":
+    st.header("Prediction")
+    st.write("Upload an image of an ASL letter.")
+    # File uploader
+    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
     if uploaded_file is not None:
+        # Load and preprocess the image
+        image = Image.open(uploaded_file).convert("RGB")  # Ensure RGB for consistent processing
+        st.image(image, caption="Uploaded Image.", use_container_width=True)
+        image_tensor, processed_image_pil = preprocess_image(image)
+        if image_tensor is not None and processed_image_pil is not None:
+            st.image(processed_image_pil, caption="Preprocessed Image.", use_container_width=True)  # Display processed image
+            # Load the model
+            model = load_model(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME)
+            if model is not None:
+                # Make a prediction
+                predicted_letter = make_prediction(model, image_tensor)
+                if predicted_letter is not None:
+                    st.write(f"Predicted Letter: {predicted_letter}")