File size: 3,881 Bytes
1fc91f5
 
 
 
 
 
 
 
dda0b8c
1fc91f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dda0b8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fc91f5
dda0b8c
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Importamos la librería
from youtube_transcript_api import YouTubeTranscriptApi
import re
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import AwaEmbeddings
import os
import openai
import streamlit as st


def get_transcript(url):
  video_id = re.search(r"(?<=v=)([^&#]+)", url)
  video_id = video_id.group(0)

    # retrieve the available transcripts
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

  # iterate over all available transcripts
  for transcript in transcript_list:
    subtitles = transcript.translate('en').fetch()

  # Imprimimos los transcript
  text = ''
  for sub in subtitles:
      text = text + ' ' + sub['text']
  return text


embeddings = AwaEmbeddings()

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 1500,
    chunk_overlap  = 100,
    length_function = len,
    is_separator_regex = False,
)

def chat(question):
  if 'database' in st.session_state:
    docs = st.session_state.database.similarity_search(question)

    prompt = [
        {"role": "system", "content": """You are my Youtube Asisstant. I will pass you texts from a Youtube Video Transcrip and I need you to use them to answer my question from the Youtube Video.
        Please do not invent any information, and I am asking about information in the Youtube Video."""},
        {"role":"user", "content": f"Context:{docs}"},
        {"role":"user", "content": f"Question:{question}"},
    ]
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        messages=prompt,
        temperature = 0
        )

    return response["choices"][0]["message"]["content"]
  else:
      return "Error, not generated database"


#------------------------------------------------------------- APP STREAMLIT--------------------------------------------------------------------

st.title("Ask Question Youtube Videos")

with st.sidebar:
    if "api" not in st.session_state:
        api_key= st.text_input(label="api", placeholder="API Key from OpenAI", label_visibility="hidden")
        if st.button(label="Save"):
            os.environ["OPENAI_API_KEY"] = api_key
            openai.api_key = api_key
            st.session_state['api'] = api_key
    else:
        url = st.text_input(label="url", placeholder="Youtube Video URL", label_visibility="hidden")
        if st.button(label="Save"):
            st.session_state['url'] = url
            info = get_transcript(url)
            texts = text_splitter.create_documents([info])
            st.session_state['database'] = FAISS.from_documents(texts, embeddings)

if "api" not in st.session_state:
    st.write("Please, introduce your OpenAI API key to ask questions to any YouTube Video")
    
elif 'url' not in st.session_state:
    st.write("Please, introduce link URL from the YouTube Video")

else:
    # Initialize chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []

    # Display chat messages from history on app rerun
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # Accept user input
    if prompt := st.chat_input("What is up?"):
        # Add user message to chat history
        st.session_state.messages.append({"role": "user", "content": prompt})
        # Display user message in chat message container
        with st.chat_message("user"):
            st.markdown(prompt)
        # Get response from your custom chat function
        response = chat(prompt)
        # Display assistant response in chat message container
        with st.chat_message("assistant"):
            st.markdown(response)
        st.session_state.messages.append({"role": "assistant", "content": response})