import streamlit as st import torch import torch.nn as nn import pickle from torch.utils.data import Dataset, DataLoader from torchvision import transforms from PIL import Image import requests import pandas as pd # Enable GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") st.write(f"Enabled GPU = {torch.cuda.is_available()}") MODEL_REPO_ID = "louiecerv/amer_sign_lang_neuralnet" DATASET_REPO_ID = "louiecerv/american_sign_language" # Load dataset from Hugging Face API def load_dataset_from_api(repo_id, split, offset, length): url = f"https://datasets-server.huggingface.co/rows?dataset={repo_id}&config=default&split={split}&offset={offset}&length={length}" response = requests.get(url) data = response.json() df = pd.DataFrame(data) return df train_data = load_dataset_from_api(DATASET_REPO_ID, "train", 0, 1000) valid_data = load_dataset_from_api(DATASET_REPO_ID, "validation", 0, 100) # Define the model architecture (must match training code) class SimpleNN(nn.Module): def __init__(self, input_size=28*28, hidden_size=512, num_classes=26): super(SimpleNN, self).__init__() self.flatten = nn.Flatten() self.fc1 = nn.Linear(input_size, hidden_size) self.relu1 = nn.ReLU() self.fc2 = nn.Linear(hidden_size, hidden_size) self.relu2 = nn.ReLU() self.fc3 = nn.Linear(hidden_size, num_classes) def forward(self, x): x = self.flatten(x) x = self.fc1(x) x = self.relu1(x) x = self.fc2(x) x = self.relu2(x) x = self.fc3(x) return x # Load pre-trained model from Hugging Face (pickle file) @st.cache_resource def load_model(): url = f"https://huggingface.co./{MODEL_REPO_ID}/resolve/main/trained_model.pkl" response = requests.get(url) with open("trained_model.pkl", "wb") as f: f.write(response.content) with open("trained_model.pkl", "rb") as f: model = pickle.load(f) model.to(device) model.eval() # Set model to evaluation mode return model model = load_model() # Custom dataset class class ASLDataset(Dataset): def __init__(self, data): self.data = data self.transform = transforms.Compose([ transforms.Grayscale(num_output_channels=1), transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) def __len__(self): return len(self.data) def __getitem__(self, idx): sample = self.data.iloc[idx] image_path = sample['image'] image = Image.open(requests.get(image_path, stream=True).raw).convert("L") label = sample['label'] image = self.transform(image) return image, torch.tensor(label, dtype=torch.long) train_dataset = ASLDataset(train_data) valid_dataset = ASLDataset(valid_data) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False) # Prediction function def predict_image(model, image): image = image.view(-1, 28*28).to(device) # Flatten image to match model input model.eval() with torch.no_grad(): output = model(image) predicted_class = torch.argmax(output, dim=1).item() return chr(predicted_class + 65) # Convert index to ASL letter (A-Z) # Streamlit App st.title("American Sign Language Recognition Using CNN") # Tabs: Dataset, Model, Prediction tab1, tab2, tab3 = st.tabs(["Dataset", "Model", "Prediction"]) # Dataset Tab with tab1: st.header("Dataset Overview") st.write("Displaying sample images from the training dataset.") col1, col2, col3, col4, col5 = st.columns(5) for i in range(5): image, label = train_dataset[i] img = transforms.ToPILImage()(image.cpu()) with [col1, col2, col3, col4, col5][i]: st.image(img, caption=f"Label: {chr(label.item() + 65)}", use_container_width=True) with tab2: st.header("Model Training") st.write("Training is done offline. The pre-trained model is loaded from Hugging Face.") st.write(model) with tab3: st.header("Make a Prediction") uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"]) if uploaded_file is not None: image = Image.open(uploaded_file).convert("L") st.image(image, caption="Original Image", use_container_width=True) transform = transforms.Compose([ transforms.Grayscale(num_output_channels=1), transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) processed_image = transform(image) predicted_class = predict_image(model, processed_image) st.success(f"Predicted ASL Letter: {predicted_class}")