Spaces:
Sleeping
Sleeping
danicafisher
commited on
Commit
•
c2bccf7
1
Parent(s):
98b8df2
Update app.py
Browse filesUses original model
app.py
CHANGED
@@ -1,23 +1,17 @@
|
|
1 |
import os
|
2 |
from langchain_community.document_loaders import PyMuPDFLoader
|
3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
4 |
-
from langchain_qdrant import QdrantVectorStore
|
5 |
from langchain_community.vectorstores import Qdrant
|
6 |
from langchain.prompts import ChatPromptTemplate
|
7 |
from langchain_openai.chat_models import ChatOpenAI
|
8 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
9 |
from langchain.embeddings.base import Embeddings
|
10 |
from langchain_core.output_parsers import StrOutputParser
|
11 |
-
|
12 |
-
from qdrant_client import QdrantClient
|
13 |
-
from qdrant_client.http.models import Distance, VectorParams
|
14 |
from operator import itemgetter
|
15 |
import chainlit as cl
|
16 |
from sentence_transformers import SentenceTransformer
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
# Load all the documents in the directory
|
22 |
documents = []
|
23 |
directory = "data/"
|
@@ -46,7 +40,7 @@ recursive_text_splitter = RecursiveCharacterTextSplitter(
|
|
46 |
length_function=len,
|
47 |
is_separator_regex=False
|
48 |
)
|
49 |
-
#
|
50 |
|
51 |
|
52 |
class SentenceTransformerEmbeddings(Embeddings):
|
@@ -59,10 +53,12 @@ class SentenceTransformerEmbeddings(Embeddings):
|
|
59 |
def embed_query(self, text):
|
60 |
return self.model.encode(text)
|
61 |
|
62 |
-
# Use the wrapper class
|
63 |
model = SentenceTransformer("danicafisher/dfisher-sentence-transformer-fine-tuned2")
|
64 |
-
embedding = SentenceTransformerEmbeddings(model)
|
65 |
-
|
|
|
|
|
66 |
|
67 |
# Create the vector store
|
68 |
vectorstore = Qdrant.from_documents(
|
@@ -76,10 +72,8 @@ retriever = vectorstore.as_retriever()
|
|
76 |
llm = ChatOpenAI(model="gpt-4")
|
77 |
|
78 |
|
79 |
-
# @cl.cache_resource
|
80 |
@cl.on_chat_start
|
81 |
async def start_chat():
|
82 |
-
|
83 |
template = """
|
84 |
Use the provided context to answer the user's query.
|
85 |
You may not answer the user's query unless there is specific context in the following text.
|
|
|
1 |
import os
|
2 |
from langchain_community.document_loaders import PyMuPDFLoader
|
3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
|
|
4 |
from langchain_community.vectorstores import Qdrant
|
5 |
from langchain.prompts import ChatPromptTemplate
|
6 |
from langchain_openai.chat_models import ChatOpenAI
|
7 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
8 |
from langchain.embeddings.base import Embeddings
|
9 |
from langchain_core.output_parsers import StrOutputParser
|
10 |
+
|
|
|
|
|
11 |
from operator import itemgetter
|
12 |
import chainlit as cl
|
13 |
from sentence_transformers import SentenceTransformer
|
14 |
|
|
|
|
|
|
|
15 |
# Load all the documents in the directory
|
16 |
documents = []
|
17 |
directory = "data/"
|
|
|
40 |
length_function=len,
|
41 |
is_separator_regex=False
|
42 |
)
|
43 |
+
#rag_documents = recursive_text_splitter.split_documents(documents)
|
44 |
|
45 |
|
46 |
class SentenceTransformerEmbeddings(Embeddings):
|
|
|
53 |
def embed_query(self, text):
|
54 |
return self.model.encode(text)
|
55 |
|
56 |
+
# Use the wrapper class for the fine-tuned model
|
57 |
model = SentenceTransformer("danicafisher/dfisher-sentence-transformer-fine-tuned2")
|
58 |
+
# embedding = SentenceTransformerEmbeddings(model)
|
59 |
+
|
60 |
+
# Non-fine-tuned model
|
61 |
+
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
|
62 |
|
63 |
# Create the vector store
|
64 |
vectorstore = Qdrant.from_documents(
|
|
|
72 |
llm = ChatOpenAI(model="gpt-4")
|
73 |
|
74 |
|
|
|
75 |
@cl.on_chat_start
|
76 |
async def start_chat():
|
|
|
77 |
template = """
|
78 |
Use the provided context to answer the user's query.
|
79 |
You may not answer the user's query unless there is specific context in the following text.
|