Spaces:

vincentmin
/

ArxivNewsLetter

Sleeping

App Files Files Community

vincentmin commited on Jun 25, 2023

Commit

d3db9a3

1 Parent(s): 0268ea7

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -17

app.py CHANGED Viewed

@@ -12,24 +12,17 @@ from langchain.schema import Document
 LOAD_MAX_DOCS = 100
-min_date = (date.today() - timedelta(days=3)).strftime('%Y%m%d')
-max_date = date.today().strftime('%Y%m%d')
-query = f"cat:hep-th AND submittedDate:[{min_date} TO {max_date}]"
-loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
 # CHUNK_SIZE = 1000
 # text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE)
 embeddings = HuggingFaceEmbeddings()
 document_prompt = PromptTemplate(
   template="Title: {Title}\nContent: {page_content}",
   input_variables=["page_content", "Title"],
 )
 prompt = PromptTemplate(
-  template=
-  """Write a personalised newsletter for a researcher on the most recent exciting developments in his field. The researcher describes his work as follows:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter.\n#ARTICLES\n\n"{text}"\n\nNEWSLETTER:\n# Your AI curated newsletter\n""",
   input_variables=["context", "text"])
 # llm = FakeListLLM(responses=list(map(str, range(100))))
@@ -57,8 +50,12 @@ def process_document(doc: Document):
     metadata["Body"] = doc.page_content
     return Document(page_content=doc.metadata["Summary"], metadata=metadata)
-def get_data(user_query: str):
     print("User query:", user_query)
     docs = loader.load()
     docs = [process_document(doc) for doc in docs]
     db = Chroma.from_documents(docs, embeddings)
@@ -73,11 +70,31 @@ def get_data(user_query: str):
     print("LLM output:", output_text)
     return f"# Your AI curated newsletter\n{output['output_text']}\n\n\n\n## Used articles:\n\n{articles}"
-demo = gr.Interface(
-    fn=get_data,
-    inputs="text",
-    outputs=gr.Markdown(),
-    title="Document Filter",
-    description="Enter a query to filter the list of documents."
-)
-demo.queue().launch()

 LOAD_MAX_DOCS = 100
 # CHUNK_SIZE = 1000
 # text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE)
 embeddings = HuggingFaceEmbeddings()
 document_prompt = PromptTemplate(
   template="Title: {Title}\nContent: {page_content}",
   input_variables=["page_content", "Title"],
 )
 prompt = PromptTemplate(
+  template="""Write a personalised newsletter for a researcher on the most recent exciting developments in his field. The researcher describes his work as follows:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter. Use Markdown format\n#ARTICLES\n\n"{text}"\n\nNEWSLETTER:\n# Your AI curated newsletter\n""",
   input_variables=["context", "text"])
 # llm = FakeListLLM(responses=list(map(str, range(100))))
     metadata["Body"] = doc.page_content
     return Document(page_content=doc.metadata["Summary"], metadata=metadata)
+def get_data(lookback_days: float, user_query: str):
     print("User query:", user_query)
+    max_date = date.today()
+    min_date = (max_date - timedelta(days=3)).strftime('%Y%m%d')
+    query = f"cat:hep-th AND submittedDate:[{min_date.strftime('%Y%m%d')} TO {max_date.strftime('%Y%m%d')}]"
+    loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
     docs = loader.load()
     docs = [process_document(doc) for doc in docs]
     db = Chroma.from_documents(docs, embeddings)
     print("LLM output:", output_text)
     return f"# Your AI curated newsletter\n{output['output_text']}\n\n\n\n## Used articles:\n\n{articles}"
+# demo = gr.Interface(
+#     fn=get_data,
+#     inputs=[lookback_days, input_text]
+#     outputs=gr.Markdown(),
+#     title="Arxiv AI Curated Newsletter",
+#     description="Describe your field of research in a few words to get a newsletter-style summary of today's Arxiv articles.",
+# )
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # Arxiv AI Curated Newsletter
+        Get a newsletter-style summary of today's Arxiv articles personalised to your field of research.
+        """
+    )
+    lookback_days = gr.Number(2, label="Articles from this many days in the past will be searched through.", minimum=1, maximum=7)
+    input_text = gr.Textbox(placeholder="Describe your field of research in a few words")
+    gr.Examples(
+        [["Supersymmetric Conformal Field Theory"], ["Black hole information paradox"]],
+        input_text,
+    )
+    output = gr.Markdown()
+    input_text.change(fn=get_data, inputs=[lookback_days,input_text], outputs=output)
+    demo.queue().launch()