gneya commited on
Commit
71ab5da
·
verified ·
1 Parent(s): 9752c7d

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +68 -0
utils.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yt_dlp
2
+ from langchain import OpenAI, LLMChain
3
+ from langchain.chains.mapreduce import MapReduceChain
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain.chains.summarize import load_summarize_chain
6
+ from dotenv import load_dotenv
7
+ from langchain_groq import ChatGroq
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.docstore.document import Document
10
+ import whisper
11
+ import textwrap
12
+ import streamlit as st
13
+
14
+
15
+
16
+ load_dotenv()
17
+
18
+
19
+ async def download_mp4_from_youtube(url):
20
+ st.write("Downloading..........")
21
+ # Set the options for the download
22
+ filename = 'abc.mp4'
23
+ ydl_opts = {
24
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
25
+ 'outtmpl': filename,
26
+ 'quiet': True,
27
+ }
28
+ # Download the video file
29
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
30
+ result = ydl.extract_info(url, download=True)
31
+ print(transcribe())
32
+
33
+
34
+
35
+ def transcribe():
36
+ st.write("Transcribing.....")
37
+ model = whisper.load_model("base")
38
+ result = model.transcribe("abc.mp4")
39
+ with open ('text.txt', 'w') as file:
40
+ file.write(result['text'])
41
+
42
+
43
+ def create_llm():
44
+ st.write("Summarizing.....")
45
+ llm = ChatGroq()
46
+ text_splitter = RecursiveCharacterTextSplitter(
47
+ chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"])
48
+ with open('text.txt') as f:
49
+ text = f.read()
50
+ texts = text_splitter.split_text(text)
51
+ docs = [Document(page_content=t) for t in texts[:4]]
52
+ prompt_template = """Write a concise bullet point summary of the following:
53
+ {text}
54
+ CONSCISE SUMMARY IN BULLET POINTS:"""
55
+ BULLET_POINT_PROMPT = PromptTemplate(template=prompt_template,
56
+ input_variables=["text"])
57
+ chain = load_summarize_chain(llm,
58
+ chain_type="stuff",
59
+ prompt=BULLET_POINT_PROMPT)
60
+ output_summary = chain.run(docs)
61
+ wrapped_text = textwrap.fill(output_summary,
62
+ width=1000,
63
+ break_long_words=False,
64
+ replace_whitespace=False)
65
+ # print(wrapped_text)
66
+ st.write("Summary of your video:")
67
+ st.write(wrapped_text)
68
+ return wrapped_text