Spaces:

crobbi
/

LipNet

Runtime error

App Files Files Community

crobbi commited on Aug 23, 2023

Commit

27fb904

•

1 Parent(s): 4ad2239

Upload 9 files

Browse files

Files changed (10) hide show

.gitattributes +1 -0
app/animation.gif +0 -0
app/modelutil.py +34 -0
app/streamlitapp.py +60 -0
app/test_video.mp4 +0 -0
app/utils.py +52 -0
models/checkpoint +2 -0
models/checkpoint.data-00000-of-00001 +3 -0
models/checkpoint.index +0 -0
requirements.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+models/checkpoint.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

app/animation.gif ADDED Viewed

app/modelutil.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from tensorflow.python.ops.numpy_ops import np_config
+np_config.enable_numpy_behavior()
+import os
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
+def load_model() -> Sequential:
+    model = Sequential()
+    model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
+    model.add(Activation('relu'))
+    model.add(MaxPool3D((1,2,2)))
+    model.add(Conv3D(256, 3, padding='same'))
+    model.add(Activation('relu'))
+    model.add(MaxPool3D((1,2,2)))
+    model.add(Conv3D(75, 3, padding='same'))
+    model.add(Activation('relu'))
+    model.add(MaxPool3D((1,2,2)))
+    model.add(TimeDistributed(Flatten()))
+    model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
+    model.add(Dropout(.5))
+    model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
+    model.add(Dropout(.5))
+    model.add(Dense(41, kernel_initializer='he_normal', activation='softmax'))
+    # print("path",os.path.join('..','models','checkpoint'))
+    model.load_weights(os.path.join('..','models','checkpoint'))
+    return model

app/streamlitapp.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# Import all of the dependencies
+import streamlit as st
+import os
+import imageio
+import numpy as np
+import tensorflow as tf
+from utils import load_data, num_to_char
+from modelutil import load_model
+# Set the layout to the streamlit app as wide
+st.set_page_config(layout='wide')
+# Setup the sidebar
+with st.sidebar:
+    st.image('https://plus.unsplash.com/premium_photo-1682309676673-392c56015c5c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1000&q=80')
+    st.title('Lip Reading')
+    st.info('This application is originally developed from the LipNet deep learning model.')
+st.title('LipNet using StreamLit ✌🏻')
+# Generating a list of options or videos
+options = os.listdir(os.path.join('..', 'data', 's1'))
+selected_video = st.selectbox('Choose video', options)
+# Generate two columns
+col1, col2 = st.columns(2)
+if options:
+    # Rendering the video
+    with col1:
+        st.info('The video below displays the converted video in mp4 format')
+        file_path = os.path.join('..','data','s1', selected_video)
+        os.system(f'ffmpeg -i {file_path} -vcodec libx264 test_video.mp4 -y')
+        # Rendering inside of the app
+        video = open('test_video.mp4', 'rb')
+        video_bytes = video.read()
+        st.video(video_bytes)
+    with col2:
+        st.info('👀 This is all the machine learning model sees when making a prediction')
+        video, annotations,image_data = load_data(tf.convert_to_tensor(file_path))
+        # st.text(video.shape)
+        imageio.mimsave('animation.gif',np.squeeze((video * 50).astype(np.uint8)) , duration=100)
+        st.image('animation.gif', width=400)
+        st.info('This is the output of the machine learning model as tokens')
+        model = load_model()
+        yhat = model.predict(tf.expand_dims(video, axis=0))
+        decoder = tf.keras.backend.ctc_decode(yhat, [75], greedy=True)[0][0].numpy()
+        st.text(decoder)
+        # Convert prediction to text
+        st.info('Decode the raw tokens into words')
+        converted_prediction = tf.strings.reduce_join(num_to_char(decoder)).numpy().decode('utf-8')
+        st.text(converted_prediction)

app/test_video.mp4 ADDED Viewed

Binary file (110 kB). View file

app/utils.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import tensorflow as tf
+from typing import List
+import numpy as np
+import cv2
+import os
+vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]
+char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
+# Mapping integers back to original characters
+num_to_char = tf.keras.layers.StringLookup(
+    vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
+)
+def load_video(path:str) -> List[float]:
+    #print(path)
+    cap = cv2.VideoCapture(path)
+    frames = []
+    for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
+        ret, frame = cap.read()
+        frame = tf.image.rgb_to_grayscale(frame)
+        frames.append(frame[190:236,80:220,:])
+    cap.release()
+    mean = tf.math.reduce_mean(frames)
+    std = tf.math.reduce_std(tf.cast(frames, tf.float32))
+    return tf.cast((frames - mean), tf.float32) / std
+def load_alignments(path:str) -> List[str]:
+    #print(path)
+    with open(path, 'r') as f:
+        lines = f.readlines()
+    tokens = []
+    for line in lines:
+        line = line.split()
+        if line[2] != 'sil':
+            tokens = [*tokens,' ',line[2]]
+    return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]
+def load_data(path: str):
+    path = bytes.decode(path.numpy())
+    file_name = path.split('/')[-1].split('.')[0]
+    # File name splitting for windows
+    file_name = path.split('\\')[-1].split('.')[0]
+    video_path = os.path.join('..','data','s1',f'{file_name}.mpg')
+    alignment_path = os.path.join('..','data','alignments','s1',f'{file_name}.align')
+    frames = load_video(video_path)
+    print(frames.shape)
+    alignments = load_alignments(alignment_path)
+    image_data = (frames * 255).astype(np.uint8)
+    image_data = np.squeeze(image_data)
+    return frames, alignments, image_data

models/checkpoint ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ model_checkpoint_path: "checkpoint"
2	+ all_model_checkpoint_paths: "checkpoint"

models/checkpoint.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e34a286a2b08711556b6e2dbbf1f21c41ea9aacdd7405762d17bce4f911b9c63
+size 101674367

models/checkpoint.index ADDED Viewed

Binary file (4.8 kB). View file

requirements.txt ADDED Viewed

Binary file (2.91 kB). View file