Spaces:

crobbi
/

LipNet

Runtime error

App Files Files Community

crobbi commited on Aug 23, 2023

Commit

0474dc3

1 Parent(s): 4791e88

Delete app

Browse files

Files changed (5) hide show

app/animation.gif +0 -0
app/modelutil.py +0 -34
app/streamlitapp.py +0 -60
app/test_video.mp4 +0 -0
app/utils.py +0 -52

app/animation.gif DELETED Viewed

Binary file (445 kB)

app/modelutil.py DELETED Viewed

@@ -1,34 +0,0 @@
-from tensorflow.python.ops.numpy_ops import np_config
-np_config.enable_numpy_behavior()
-import os
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
-def load_model() -> Sequential:
-    model = Sequential()
-    model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
-    model.add(Activation('relu'))
-    model.add(MaxPool3D((1,2,2)))
-    model.add(Conv3D(256, 3, padding='same'))
-    model.add(Activation('relu'))
-    model.add(MaxPool3D((1,2,2)))
-    model.add(Conv3D(75, 3, padding='same'))
-    model.add(Activation('relu'))
-    model.add(MaxPool3D((1,2,2)))
-    model.add(TimeDistributed(Flatten()))
-    model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
-    model.add(Dropout(.5))
-    model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
-    model.add(Dropout(.5))
-    model.add(Dense(41, kernel_initializer='he_normal', activation='softmax'))
-    # print("path",os.path.join('..','models','checkpoint'))
-    model.load_weights(os.path.join('..','models','checkpoint'))
-    return model

app/streamlitapp.py DELETED Viewed

@@ -1,60 +0,0 @@
-# Import all of the dependencies
-import streamlit as st
-import os
-import imageio
-import numpy as np
-import tensorflow as tf
-from utils import load_data, num_to_char
-from modelutil import load_model
-# Set the layout to the streamlit app as wide
-st.set_page_config(layout='wide')
-# Setup the sidebar
-with st.sidebar:
-    st.image('https://plus.unsplash.com/premium_photo-1682309676673-392c56015c5c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1000&q=80')
-    st.title('Lip Reading')
-    st.info('This application is originally developed from the LipNet deep learning model.')
-st.title('LipNet using StreamLit ✌🏻')
-# Generating a list of options or videos
-options = os.listdir(os.path.join('..', 'data', 's1'))
-selected_video = st.selectbox('Choose video', options)
-# Generate two columns
-col1, col2 = st.columns(2)
-if options:
-    # Rendering the video
-    with col1:
-        st.info('The video below displays the converted video in mp4 format')
-        file_path = os.path.join('..','data','s1', selected_video)
-        os.system(f'ffmpeg -i {file_path} -vcodec libx264 test_video.mp4 -y')
-        # Rendering inside of the app
-        video = open('test_video.mp4', 'rb')
-        video_bytes = video.read()
-        st.video(video_bytes)
-    with col2:
-        st.info('👀 This is all the machine learning model sees when making a prediction')
-        video, annotations,image_data = load_data(tf.convert_to_tensor(file_path))
-        # st.text(video.shape)
-        imageio.mimsave('animation.gif',np.squeeze((video * 50).astype(np.uint8)) , duration=100)
-        st.image('animation.gif', width=400)
-        st.info('This is the output of the machine learning model as tokens')
-        model = load_model()
-        yhat = model.predict(tf.expand_dims(video, axis=0))
-        decoder = tf.keras.backend.ctc_decode(yhat, [75], greedy=True)[0][0].numpy()
-        st.text(decoder)
-        # Convert prediction to text
-        st.info('Decode the raw tokens into words')
-        converted_prediction = tf.strings.reduce_join(num_to_char(decoder)).numpy().decode('utf-8')
-        st.text(converted_prediction)

app/test_video.mp4 DELETED Viewed

Binary file (110 kB)

app/utils.py DELETED Viewed

@@ -1,52 +0,0 @@
-import tensorflow as tf
-from typing import List
-import numpy as np
-import cv2
-import os
-vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]
-char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
-# Mapping integers back to original characters
-num_to_char = tf.keras.layers.StringLookup(
-    vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
-)
-def load_video(path:str) -> List[float]:
-    #print(path)
-    cap = cv2.VideoCapture(path)
-    frames = []
-    for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
-        ret, frame = cap.read()
-        frame = tf.image.rgb_to_grayscale(frame)
-        frames.append(frame[190:236,80:220,:])
-    cap.release()
-    mean = tf.math.reduce_mean(frames)
-    std = tf.math.reduce_std(tf.cast(frames, tf.float32))
-    return tf.cast((frames - mean), tf.float32) / std
-def load_alignments(path:str) -> List[str]:
-    #print(path)
-    with open(path, 'r') as f:
-        lines = f.readlines()
-    tokens = []
-    for line in lines:
-        line = line.split()
-        if line[2] != 'sil':
-            tokens = [*tokens,' ',line[2]]
-    return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]
-def load_data(path: str):
-    path = bytes.decode(path.numpy())
-    file_name = path.split('/')[-1].split('.')[0]
-    # File name splitting for windows
-    file_name = path.split('\\')[-1].split('.')[0]
-    video_path = os.path.join('..','data','s1',f'{file_name}.mpg')
-    alignment_path = os.path.join('..','data','alignments','s1',f'{file_name}.align')
-    frames = load_video(video_path)
-    print(frames.shape)
-    alignments = load_alignments(alignment_path)
-    image_data = (frames * 255).astype(np.uint8)
-    image_data = np.squeeze(image_data)
-    return frames, alignments, image_data