Spaces:
Runtime error
Runtime error
Delete app
Browse files- app/animation.gif +0 -0
- app/modelutil.py +0 -34
- app/streamlitapp.py +0 -60
- app/test_video.mp4 +0 -0
- app/utils.py +0 -52
app/animation.gif
DELETED
Binary file (445 kB)
|
|
app/modelutil.py
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
from tensorflow.python.ops.numpy_ops import np_config
|
2 |
-
np_config.enable_numpy_behavior()
|
3 |
-
import os
|
4 |
-
from tensorflow.keras.models import Sequential
|
5 |
-
from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
|
6 |
-
|
7 |
-
def load_model() -> Sequential:
|
8 |
-
model = Sequential()
|
9 |
-
|
10 |
-
model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
|
11 |
-
model.add(Activation('relu'))
|
12 |
-
model.add(MaxPool3D((1,2,2)))
|
13 |
-
|
14 |
-
model.add(Conv3D(256, 3, padding='same'))
|
15 |
-
model.add(Activation('relu'))
|
16 |
-
model.add(MaxPool3D((1,2,2)))
|
17 |
-
|
18 |
-
model.add(Conv3D(75, 3, padding='same'))
|
19 |
-
model.add(Activation('relu'))
|
20 |
-
model.add(MaxPool3D((1,2,2)))
|
21 |
-
|
22 |
-
model.add(TimeDistributed(Flatten()))
|
23 |
-
|
24 |
-
model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
|
25 |
-
model.add(Dropout(.5))
|
26 |
-
|
27 |
-
model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
|
28 |
-
model.add(Dropout(.5))
|
29 |
-
|
30 |
-
model.add(Dense(41, kernel_initializer='he_normal', activation='softmax'))
|
31 |
-
# print("path",os.path.join('..','models','checkpoint'))
|
32 |
-
model.load_weights(os.path.join('..','models','checkpoint'))
|
33 |
-
|
34 |
-
return model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/streamlitapp.py
DELETED
@@ -1,60 +0,0 @@
|
|
1 |
-
# Import all of the dependencies
|
2 |
-
import streamlit as st
|
3 |
-
import os
|
4 |
-
import imageio
|
5 |
-
import numpy as np
|
6 |
-
|
7 |
-
import tensorflow as tf
|
8 |
-
from utils import load_data, num_to_char
|
9 |
-
from modelutil import load_model
|
10 |
-
|
11 |
-
|
12 |
-
# Set the layout to the streamlit app as wide
|
13 |
-
st.set_page_config(layout='wide')
|
14 |
-
|
15 |
-
# Setup the sidebar
|
16 |
-
with st.sidebar:
|
17 |
-
st.image('https://plus.unsplash.com/premium_photo-1682309676673-392c56015c5c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1000&q=80')
|
18 |
-
st.title('Lip Reading')
|
19 |
-
st.info('This application is originally developed from the LipNet deep learning model.')
|
20 |
-
|
21 |
-
st.title('LipNet using StreamLit ✌🏻')
|
22 |
-
# Generating a list of options or videos
|
23 |
-
options = os.listdir(os.path.join('..', 'data', 's1'))
|
24 |
-
selected_video = st.selectbox('Choose video', options)
|
25 |
-
|
26 |
-
# Generate two columns
|
27 |
-
col1, col2 = st.columns(2)
|
28 |
-
|
29 |
-
if options:
|
30 |
-
|
31 |
-
# Rendering the video
|
32 |
-
with col1:
|
33 |
-
st.info('The video below displays the converted video in mp4 format')
|
34 |
-
file_path = os.path.join('..','data','s1', selected_video)
|
35 |
-
os.system(f'ffmpeg -i {file_path} -vcodec libx264 test_video.mp4 -y')
|
36 |
-
|
37 |
-
# Rendering inside of the app
|
38 |
-
video = open('test_video.mp4', 'rb')
|
39 |
-
video_bytes = video.read()
|
40 |
-
st.video(video_bytes)
|
41 |
-
|
42 |
-
|
43 |
-
with col2:
|
44 |
-
st.info('👀 This is all the machine learning model sees when making a prediction')
|
45 |
-
video, annotations,image_data = load_data(tf.convert_to_tensor(file_path))
|
46 |
-
# st.text(video.shape)
|
47 |
-
imageio.mimsave('animation.gif',np.squeeze((video * 50).astype(np.uint8)) , duration=100)
|
48 |
-
st.image('animation.gif', width=400)
|
49 |
-
|
50 |
-
st.info('This is the output of the machine learning model as tokens')
|
51 |
-
model = load_model()
|
52 |
-
yhat = model.predict(tf.expand_dims(video, axis=0))
|
53 |
-
decoder = tf.keras.backend.ctc_decode(yhat, [75], greedy=True)[0][0].numpy()
|
54 |
-
st.text(decoder)
|
55 |
-
|
56 |
-
# Convert prediction to text
|
57 |
-
st.info('Decode the raw tokens into words')
|
58 |
-
converted_prediction = tf.strings.reduce_join(num_to_char(decoder)).numpy().decode('utf-8')
|
59 |
-
st.text(converted_prediction)
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/test_video.mp4
DELETED
Binary file (110 kB)
|
|
app/utils.py
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
import tensorflow as tf
|
2 |
-
from typing import List
|
3 |
-
import numpy as np
|
4 |
-
import cv2
|
5 |
-
import os
|
6 |
-
|
7 |
-
vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]
|
8 |
-
char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
|
9 |
-
# Mapping integers back to original characters
|
10 |
-
num_to_char = tf.keras.layers.StringLookup(
|
11 |
-
vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
|
12 |
-
)
|
13 |
-
|
14 |
-
def load_video(path:str) -> List[float]:
|
15 |
-
#print(path)
|
16 |
-
cap = cv2.VideoCapture(path)
|
17 |
-
frames = []
|
18 |
-
for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
|
19 |
-
ret, frame = cap.read()
|
20 |
-
frame = tf.image.rgb_to_grayscale(frame)
|
21 |
-
frames.append(frame[190:236,80:220,:])
|
22 |
-
cap.release()
|
23 |
-
|
24 |
-
mean = tf.math.reduce_mean(frames)
|
25 |
-
std = tf.math.reduce_std(tf.cast(frames, tf.float32))
|
26 |
-
return tf.cast((frames - mean), tf.float32) / std
|
27 |
-
|
28 |
-
def load_alignments(path:str) -> List[str]:
|
29 |
-
#print(path)
|
30 |
-
with open(path, 'r') as f:
|
31 |
-
lines = f.readlines()
|
32 |
-
tokens = []
|
33 |
-
for line in lines:
|
34 |
-
line = line.split()
|
35 |
-
if line[2] != 'sil':
|
36 |
-
tokens = [*tokens,' ',line[2]]
|
37 |
-
return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]
|
38 |
-
|
39 |
-
def load_data(path: str):
|
40 |
-
path = bytes.decode(path.numpy())
|
41 |
-
file_name = path.split('/')[-1].split('.')[0]
|
42 |
-
# File name splitting for windows
|
43 |
-
file_name = path.split('\\')[-1].split('.')[0]
|
44 |
-
video_path = os.path.join('..','data','s1',f'{file_name}.mpg')
|
45 |
-
alignment_path = os.path.join('..','data','alignments','s1',f'{file_name}.align')
|
46 |
-
frames = load_video(video_path)
|
47 |
-
print(frames.shape)
|
48 |
-
alignments = load_alignments(alignment_path)
|
49 |
-
image_data = (frames * 255).astype(np.uint8)
|
50 |
-
image_data = np.squeeze(image_data)
|
51 |
-
|
52 |
-
return frames, alignments, image_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|