File size: 2,878 Bytes
bdb5b58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
import os

# Set up OpenAI API key
OPENAI_API_KEY = "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"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

def train_model_with_transcript(transcript):
    """Train a language model using the transcript."""
    # Split transcript into smaller chunks
    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = splitter.split_text(transcript)
    
    # Create embeddings and vector store
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(docs, embeddings)
    return vectorstore

def generate_similar_content(query, vectorstore):
    """Generate content similar to the input query using the trained model."""
    llm = ChatOpenAI(model_name="gpt-3.5-turbo")
    retriever = vectorstore.as_retriever()
    prompt_template = """
    Use the context below to generate content similar to the provided input:

    Context: {context}
    Input Query: {query}

    Similar Content:
    """
    prompt = PromptTemplate(input_variables=["context", "query"], template=prompt_template)
    chain = LLMChain(llm=llm, prompt=prompt)
    
    context = retriever.get_relevant_documents(query)
    context_text = " ".join([doc.page_content for doc in context])
    
    result = chain.run({"context": context_text, "query": query})
    return result

# Streamlit app UI
st.title("Text-based Content Generator")
st.markdown("Upload a transcription file, train the model, and generate similar content.")

uploaded_file = st.file_uploader("Upload Transcription File (TXT):", type=["txt"])

if uploaded_file:
    with st.spinner("Reading transcription file..."):
        transcription = uploaded_file.read().decode("utf-8")
        st.success("Transcription file loaded successfully!")

        if st.button("Train Model"):
            with st.spinner("Training model..."):
                vectorstore = train_model_with_transcript(transcription)
                st.success("Model trained successfully!")

query = st.text_input("Enter your query to generate similar content:")

if st.button("Generate Content"):
    if 'vectorstore' in locals():
        with st.spinner("Generating content..."):
            result = generate_similar_content(query, vectorstore)
            st.success("Content generated successfully!")
            st.text_area("Generated Content", value=result, height=200)
    else:
        st.error("Please train the model first by uploading a transcription file.")