vincentmin commited on
Commit
d3db9a3
·
1 Parent(s): 0268ea7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -17
app.py CHANGED
@@ -12,24 +12,17 @@ from langchain.schema import Document
12
 
13
 
14
  LOAD_MAX_DOCS = 100
15
- min_date = (date.today() - timedelta(days=3)).strftime('%Y%m%d')
16
- max_date = date.today().strftime('%Y%m%d')
17
- query = f"cat:hep-th AND submittedDate:[{min_date} TO {max_date}]"
18
- loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
19
-
20
  # CHUNK_SIZE = 1000
21
  # text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE)
22
 
23
  embeddings = HuggingFaceEmbeddings()
24
 
25
-
26
  document_prompt = PromptTemplate(
27
  template="Title: {Title}\nContent: {page_content}",
28
  input_variables=["page_content", "Title"],
29
  )
30
  prompt = PromptTemplate(
31
- template=
32
- """Write a personalised newsletter for a researcher on the most recent exciting developments in his field. The researcher describes his work as follows:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter.\n#ARTICLES\n\n"{text}"\n\nNEWSLETTER:\n# Your AI curated newsletter\n""",
33
  input_variables=["context", "text"])
34
 
35
  # llm = FakeListLLM(responses=list(map(str, range(100))))
@@ -57,8 +50,12 @@ def process_document(doc: Document):
57
  metadata["Body"] = doc.page_content
58
  return Document(page_content=doc.metadata["Summary"], metadata=metadata)
59
 
60
- def get_data(user_query: str):
61
  print("User query:", user_query)
 
 
 
 
62
  docs = loader.load()
63
  docs = [process_document(doc) for doc in docs]
64
  db = Chroma.from_documents(docs, embeddings)
@@ -73,11 +70,31 @@ def get_data(user_query: str):
73
  print("LLM output:", output_text)
74
  return f"# Your AI curated newsletter\n{output['output_text']}\n\n\n\n## Used articles:\n\n{articles}"
75
 
76
- demo = gr.Interface(
77
- fn=get_data,
78
- inputs="text",
79
- outputs=gr.Markdown(),
80
- title="Document Filter",
81
- description="Enter a query to filter the list of documents."
82
- )
83
- demo.queue().launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  LOAD_MAX_DOCS = 100
 
 
 
 
 
15
  # CHUNK_SIZE = 1000
16
  # text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE)
17
 
18
  embeddings = HuggingFaceEmbeddings()
19
 
 
20
  document_prompt = PromptTemplate(
21
  template="Title: {Title}\nContent: {page_content}",
22
  input_variables=["page_content", "Title"],
23
  )
24
  prompt = PromptTemplate(
25
+ template="""Write a personalised newsletter for a researcher on the most recent exciting developments in his field. The researcher describes his work as follows:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter. Use Markdown format\n#ARTICLES\n\n"{text}"\n\nNEWSLETTER:\n# Your AI curated newsletter\n""",
 
26
  input_variables=["context", "text"])
27
 
28
  # llm = FakeListLLM(responses=list(map(str, range(100))))
 
50
  metadata["Body"] = doc.page_content
51
  return Document(page_content=doc.metadata["Summary"], metadata=metadata)
52
 
53
+ def get_data(lookback_days: float, user_query: str):
54
  print("User query:", user_query)
55
+ max_date = date.today()
56
+ min_date = (max_date - timedelta(days=3)).strftime('%Y%m%d')
57
+ query = f"cat:hep-th AND submittedDate:[{min_date.strftime('%Y%m%d')} TO {max_date.strftime('%Y%m%d')}]"
58
+ loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
59
  docs = loader.load()
60
  docs = [process_document(doc) for doc in docs]
61
  db = Chroma.from_documents(docs, embeddings)
 
70
  print("LLM output:", output_text)
71
  return f"# Your AI curated newsletter\n{output['output_text']}\n\n\n\n## Used articles:\n\n{articles}"
72
 
73
+ # demo = gr.Interface(
74
+ # fn=get_data,
75
+ # inputs=[lookback_days, input_text]
76
+ # outputs=gr.Markdown(),
77
+ # title="Arxiv AI Curated Newsletter",
78
+ # description="Describe your field of research in a few words to get a newsletter-style summary of today's Arxiv articles.",
79
+ # )
80
+
81
+ with gr.Blocks() as demo:
82
+ gr.Markdown(
83
+ """
84
+ # Arxiv AI Curated Newsletter
85
+
86
+
87
+ Get a newsletter-style summary of today's Arxiv articles personalised to your field of research.
88
+ """
89
+ )
90
+ lookback_days = gr.Number(2, label="Articles from this many days in the past will be searched through.", minimum=1, maximum=7)
91
+ input_text = gr.Textbox(placeholder="Describe your field of research in a few words")
92
+ gr.Examples(
93
+ [["Supersymmetric Conformal Field Theory"], ["Black hole information paradox"]],
94
+ input_text,
95
+ )
96
+ output = gr.Markdown()
97
+
98
+ input_text.change(fn=get_data, inputs=[lookback_days,input_text], outputs=output)
99
+
100
+ demo.queue().launch()