Spaces:

gneya
/

youtube_video_summarizer

Sleeping

App Files Files Community

gneya commited on Jun 2, 2024

Commit

71ab5da

verified ·

1 Parent(s): 9752c7d

Upload utils.py

Browse files

Files changed (1) hide show

utils.py +68 -0

utils.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import yt_dlp
+from langchain import OpenAI, LLMChain
+from langchain.chains.mapreduce import MapReduceChain
+from langchain.prompts import PromptTemplate
+from langchain.chains.summarize import load_summarize_chain
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.docstore.document import Document
+import whisper
+import textwrap
+import streamlit as st
+load_dotenv()
+async def download_mp4_from_youtube(url):
+    st.write("Downloading..........")
+    # Set the options for the download
+    filename = 'abc.mp4'
+    ydl_opts = {
+        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
+        'outtmpl': filename,
+        'quiet': True,
+    }
+    # Download the video file
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        result = ydl.extract_info(url, download=True)
+    print(transcribe())
+def transcribe():
+    st.write("Transcribing.....")
+    model = whisper.load_model("base")
+    result = model.transcribe("abc.mp4")
+    with open ('text.txt', 'w') as file:
+            file.write(result['text'])
+def create_llm():
+    st.write("Summarizing.....")
+    llm = ChatGroq()
+    text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"])
+    with open('text.txt') as f:
+        text = f.read()
+    texts = text_splitter.split_text(text)
+    docs = [Document(page_content=t) for t in texts[:4]]
+    prompt_template = """Write a concise bullet point summary of the following:
+    {text}
+    CONSCISE SUMMARY IN BULLET POINTS:"""
+    BULLET_POINT_PROMPT = PromptTemplate(template=prompt_template,
+                        input_variables=["text"])
+    chain = load_summarize_chain(llm,
+                             chain_type="stuff",
+                             prompt=BULLET_POINT_PROMPT)
+    output_summary = chain.run(docs)
+    wrapped_text = textwrap.fill(output_summary,
+                             width=1000,
+                             break_long_words=False,
+                             replace_whitespace=False)
+    # print(wrapped_text)
+    st.write("Summary of your video:")
+    st.write(wrapped_text)
+    return wrapped_text