File size: 4,270 Bytes
21215cb
ceac115
21215cb
eff85b8
 
21215cb
 
 
 
eff85b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

# copyright Guy Giesbrecht and Andy Walters January, 2024
#

import os
os.system('pip install -U openai-whisper')
os.system('pip install -U git+https://github.com/linto-ai/whisper-timestamped')
os.system('pip install gradio moviepy whisper-timestamped')

import datetime as dt
import json
import whisper_timestamped as whisper
from moviepy.video.io.VideoFileClip import VideoFileClip
import gradio as gr

# Helper functions and global variables
outdir = dt.datetime.now().strftime("%Y%m%d%H%M")
if os.path.exists(outdir):
    random_digits = str(random.randint(1000, 9999))
    new_outdir = outdir + random_digits
    os.mkdir(new_outdir)
    outdir = new_outdir
    print("Created new output directory:", new_outdir)
else:
    os.system(f"mkdir {outdir}")
print("date time now:" + outdir)

model = whisper.load_model("base")

def generate_timestamps(vidname):
    audio = whisper.load_audio(vidname)
    result = whisper.transcribe(model, audio, language="en")
    return result

def get_segment_info(data):
    new_list = []
    for segment in data.get("segments", []):
        if "id" in segment and "start" in segment and "end" in segment and "text" in segment:
            new_item = {
                "id": segment["id"],
                "start": segment["start"],
                "end": segment["end"],
                "text": segment["text"]
            }
            new_list.append(new_item)
    return new_list

def combine_entries(entries):
    combined_entries = []
    current_entry = None
    total_duration = 0

    for entry in entries:
        entry_duration = entry["end"] - entry["start"]

        if total_duration + entry_duration > 30:
            if current_entry:
                current_entry["end"] = entry["end"]
                combined_entries.append(current_entry)

            current_entry = {
                "start": entry["start"],
                "end": entry["end"],
                "text": entry["text"]
            }
            total_duration = entry_duration
        else:
            if current_entry:
                current_entry["end"] = entry["end"]
                current_entry["text"] += " " + entry["text"]
                total_duration += entry_duration
            else:
                current_entry = {
                    "start": entry["start"],
                    "end": entry["end"],
                    "text": entry["text"]
                }
                total_duration = entry_duration

    if current_entry:
        combined_entries.append(current_entry)

    return combined_entries

def extract_video_segment(input_video, output_video, start_time, end_time):
    video_clip = VideoFileClip(input_video).subclip(start_time, end_time)
    video_clip.write_videofile(output_video, codec="libx264", audio_codec="aac")
    video_clip.close()

def save_segments(outdir, name, combined_entries):
    segments = combined_entries
    input_video = name
    for i, segment in enumerate(segments):
        start_time = segment['start']
        end_time = segment['end']
        output_video_file = f'{outdir}/output_segment_{i + 1}.mp4'
        extract_video_segment(input_video, output_video_file, start_time, end_time)

def split_up_video(video_path, output_dir):
    result = generate_timestamps(video_path)
    combined_entries = combine_entries(get_segment_info(result))

    scribeout = open(f"{output_dir}/transcript.txt", "w")
    scribeout.write(json.dumps(combined_entries, indent=2, ensure_ascii=False))
    scribeout.close()

    save_segments(output_dir, video_path, combined_entries)

    filename, extension = os.path.splitext(video_path)
    os.system(f"zip -r {filename}.zip {output_dir}")

    return f"{filename}.zip"

# Gradio interface
def process_video(video):
    output_dir = dt.datetime.now().strftime("%Y%m%d%H%M")
    os.mkdir(output_dir)
    video_path = video.name
    output_zip = split_up_video(video_path, output_dir)
    return output_zip

iface = gr.Interface(
    fn=process_video,
    inputs=gr.File(file_count="single", type="filepath", label="Upload a Video"),
    outputs=gr.File(label="Download Zipped Segments and Transcript"),
    title="Video Splitter",
    description="Upload a video and get a zipped file with segmented videos and a transcript."
)

iface.launch()