Spaces:

batlahiya
/

t2

Running

App Files Files Community

batlahiya commited on Jul 11, 2024

Commit

d2407ee

verified ·

1 Parent(s): 1f18304

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -30

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import re
 from pathlib import Path
 from unidecode import unidecode
-from tqdm import tqdm
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
@@ -22,7 +22,7 @@ import threading
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # Predefined values
-predefined_pdf = "t6.pdf"
 predefined_llm = "meta-llama/Llama-2-7b-hf"  # Use a smaller model for faster responses
 def load_doc(list_file_path, chunk_size, chunk_overlap):
@@ -101,7 +101,6 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
         return_messages=True
     )
     retriever = vector_db.as_retriever()
     print("Defining retrieval chain...")
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm,
@@ -121,9 +120,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
 # Define the conversation function with callback (non-blocking)
 @spaces.GPU()
-def conversation(message):
   global qa_chain  # Assuming qa_chain is a global variable
   # Model definition (ensure it's accessible within the function)
@@ -133,29 +130,37 @@ def conversation(message):
   max_new_tokens = 64  # Define max_new_tokens here
-  def generate_chunks(message, max_new_tokens):
-    max_chunk_length = 512  # Adjust this value based on your model and memory constraints
-    # Split the message into chunks
-    chunks = [message[i:i+max_chunk_length] for i in range(0, len(message), max_chunk_length)]
-    outputs = []
-    for chunk in chunks:
-      input_ids = tokenizer(chunk, return_tensors="pt")["input_ids"]
-      generated_chunk = model.generate(input_ids=input_ids, max_new_tokens=max_new_tokens)  # ... other generation arguments
-      outputs.append(generated_chunk[0]['generated_text'])  # Assuming generated text is in the first element
-    return "".join(outputs)
-  # Generate response with progress bar
-  with tqdm(total=len(message) // max_chunk_length + 1) as pbar:
-    generated_response = generate_chunks(message, max_new_tokens)
-    pbar.update()
-  if generated_response:
-    yield generated_response
-  else:
-    yield "No response generated."  # Provide a fallback message
 # Launch the Gradio interface with share option
 interface = gr.Interface(
@@ -164,5 +169,7 @@ interface = gr.Interface(
     outputs="text",  # Text output for streaming
     title="Conversational AI with Retrieval",
     description="Ask me anything about the uploaded PDF document!",
 )
-interface.launch(share=True)

 import re
 from pathlib import Path
 from unidecode import unidecode
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # Predefined values
+predefined_pdf = "t6.pdf"  # Replace with your PDF filepath
 predefined_llm = "meta-llama/Llama-2-7b-hf"  # Use a smaller model for faster responses
 def load_doc(list_file_path, chunk_size, chunk_overlap):
         return_messages=True
     )
     retriever = vector_db.as_retriever()
     print("Defining retrieval chain...")
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm,
 # Define the conversation function with callback (non-blocking)
 @spaces.GPU()
+def conversation(message, max_chunk_length=512):  # Define max_chunk_length as an argument
   global qa_chain  # Assuming qa_chain is a global variable
   # Model definition (ensure it's accessible within the function)
   max_new_tokens = 64  # Define max_new_tokens here
+  def generate_chunks(message, max_new_tokens, callback):
+    # ... rest of the generate_chunks function ... (unchanged)
+  def handle_response(response):
+    if response:
+      yield response
+    else:
+      yield "No response generated."  # Provide a fallback message
+  # Start generation in a thread with callback for response
+  thread = threading.Thread(target=generate_chunks, args=(message, max_new_tokens, handle_response))
+  thread.start()
+  # Yield a placeholder message initially
+  yield "Generating response..."
+# Load or create the document database (adjust as needed)
+pdf_filepath = predefined_pdf
+collection_name = create_collection_name(pdf_filepath)
+if os.path.exists(collection_name):
+  vector_db = load_db()
+  vector_db.connect(collection_name)
+  print("Loaded document database from:", collection_name)
+else:
+  print("Creating document database...")
+  doc_splits = load_doc([pdf_filepath], chunk_size=4096, chunk_overlap=512)
+  vector_db = create_db(doc_splits, collection_name)
+  print("Document database created:", collection_name)
+# Initialize the LLM conversation chain
+qa_chain = initialize_llmchain(predefined_llm, temperature=0.7, max_tokens=64, top_k=50, vector_db=vector_db)  # Adjust parameters as needed
 # Launch the Gradio interface with share option
 interface = gr.Interface(
     outputs="text",  # Text output for streaming
     title="Conversational AI with Retrieval",
     description="Ask me anything about the uploaded PDF document!",
+    arguments=[("max_chunk_length", int)],  # Pass max_chunk_length as an argument
 )
+interface.launch(share=True)  # Set share=True to create a public link