vincentmin commited on
Commit
bb7257e
·
1 Parent(s): 3cfcfcb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import date, timedelta
2
+ from langchain.document_loaders import ArxivLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+
7
+ def get_data(user_query: str, load_max_docs: int = 5, chunk_size: int=1000):
8
+ min_date = (date.today() - timedelta(days=2)).strftime('%Y%m%d')
9
+ max_date = date.today().strftime('%Y%m%d')
10
+ query = f"cat:hep-th AND submittedDate:[{min_date.strftime('%Y%m%d')} TO {max_date.strftime('%Y%m%d')}]"
11
+ loader = ArxivLoader(query=query, load_max_docs=load_max_docs)
12
+ documents = loader.load()
13
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size)
14
+ texts = text_splitter.split_documents(documents)
15
+ embeddings = HuggingFaceEmbeddings()
16
+ db = FAISS.from_documents(texts, embeddings)
17
+ retriever = db.as_retriever()
18
+ docs = retriever.get_relevant_documents(user_query)
19
+ print(docs[0].metadata)
20
+ return "\n\n".join([d.page_content for d in docs])
21
+
22
+ demo = gr.Interface(
23
+ fn=get_data,
24
+ inputs="text",
25
+ outputs="text",
26
+ title="Document Filter",
27
+ description="Enter a query to filter the list of documents."
28
+ )
29
+ demo.queue().launch()