jonathanjordan21's picture
Update components/pexels.py
667da1c
import requests
import shutil,os,re
import datetime
import torch
import soundfile as sf
from components.custom_llm import custom_chain
# Searching for the videos
def search_pexels(keyword, api_key, orientation='potrait', size='medium', endpoint='videos', num_pages=50, pages=4):
if orientation not in ['potrait', 'landscape', 'square']:
raise Exception("Error! orientation must be one of {'square', 'landscape', 'potrait'}")
if size not in ['medium', 'small', 'large']:
raise Exception("Error! size must be one of ['medium', 'small', 'large']")
base_url = 'https://api.pexels.com/'
headers = {
'Authorization': f'{api_key}'
}
url = f'{base_url}{endpoint}/search?query={keyword}&per_page={num_pages}&orientation={orientation}&page={pages}'
response = requests.get(url, headers=headers)
# Check if request was successful (status code 200)
if response.status_code == 200:
data = response.json()
return data
else:
print(f'Error: {response.status_code}')
# Video download function
def download_video(data, parent_path, height, width, links, i):
for x in data['videos'] :
if x['id'] in links:
continue
vid = x['video_files']
print(vid)
for v in vid:
if v['height'] == height and v['width'] == width :
with open(f"{os.path.join(parent_path,str(i) + '_' + str(v['id']))}.mp4", 'bw') as f:
f.write(requests.get(v['link']).content)
print("Sucessfully saved video in", os.path.join(parent_path,str(i) + '_' + str(v['id'])) + '.mp4')
return x['id']
def generate_voice(text, model, tokenizer, model2, tokenizer2, text_cls):
speeches = []
for x in text:
x = x+"."
if text_cls(x)[0]['label'][:4] == 'Indo':
inputs = tokenizer(x, return_tensors="pt")
with torch.no_grad():
output = model(**inputs).waveform
speeches.append(output)
else :
inputs = tokenizer2(x, return_tensors="pt")
with torch.no_grad():
output = model2(**inputs).waveform
speeches.append(output)
return speeches, [len(x)/16500 for x in speeches]
# Utilizing the LLMs to find the relevant videos
def generate_videos(text, api_key, orientation, height, width, model, tokenizer, model2, tokenizer2, text_cls):
links = []
try :
# Split the paragraph by sentences
# sentences = list(filter(None,[x.strip() for x in re.split(r'[^A-Za-z0-9 -]', text)]))
# print(len(sentences))
# sentences = list(filter(None,[x.strip() for x in re.split(r'[^A-Za-z -]', custom_chain().invoke(text))]))
sentences = [x.split('-')[0].strip() for x in filter(lambda x:'-' in x,re.split(r'[^A-Za-z -]', custom_chain().invoke(text)))]
# Create directory with the name
di = str(datetime.datetime.now())
if os.path.exists(di):
shutil.rmtree(di)
os.mkdir(di)
# Generate video for every sentence
print("Keyword :")
for i,s in enumerate(sentences):
if s=='':
s='videos'
# keyword = sum_llm_chain.run(s)
print(i+1, ":", s)
data = search_pexels(s, api_key, orientation.lower())
link = download_video(data, di, height, width, links,i)
links.append(link)
sentences = list(filter(None,[x.strip() for x in re.split(r'[^A-Za-z0-9 -]', text)]))
speeches, length_speech = generate_voice(sentences, model, tokenizer, model2, tokenizer2, text_cls)
sf.write("x.wav", torch.cat(speeches, 1)[0], 16500)
print("Success! videos has been generated")
except Exception as e :
print("Error! Failed generating videos")
print(e)
return di, sentences, length_speech