Spaces:
Sleeping
Sleeping
vincentmin
commited on
Commit
·
d3db9a3
1
Parent(s):
0268ea7
Update app.py
Browse files
app.py
CHANGED
@@ -12,24 +12,17 @@ from langchain.schema import Document
|
|
12 |
|
13 |
|
14 |
LOAD_MAX_DOCS = 100
|
15 |
-
min_date = (date.today() - timedelta(days=3)).strftime('%Y%m%d')
|
16 |
-
max_date = date.today().strftime('%Y%m%d')
|
17 |
-
query = f"cat:hep-th AND submittedDate:[{min_date} TO {max_date}]"
|
18 |
-
loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
|
19 |
-
|
20 |
# CHUNK_SIZE = 1000
|
21 |
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE)
|
22 |
|
23 |
embeddings = HuggingFaceEmbeddings()
|
24 |
|
25 |
-
|
26 |
document_prompt = PromptTemplate(
|
27 |
template="Title: {Title}\nContent: {page_content}",
|
28 |
input_variables=["page_content", "Title"],
|
29 |
)
|
30 |
prompt = PromptTemplate(
|
31 |
-
template=
|
32 |
-
"""Write a personalised newsletter for a researcher on the most recent exciting developments in his field. The researcher describes his work as follows:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter.\n#ARTICLES\n\n"{text}"\n\nNEWSLETTER:\n# Your AI curated newsletter\n""",
|
33 |
input_variables=["context", "text"])
|
34 |
|
35 |
# llm = FakeListLLM(responses=list(map(str, range(100))))
|
@@ -57,8 +50,12 @@ def process_document(doc: Document):
|
|
57 |
metadata["Body"] = doc.page_content
|
58 |
return Document(page_content=doc.metadata["Summary"], metadata=metadata)
|
59 |
|
60 |
-
def get_data(user_query: str):
|
61 |
print("User query:", user_query)
|
|
|
|
|
|
|
|
|
62 |
docs = loader.load()
|
63 |
docs = [process_document(doc) for doc in docs]
|
64 |
db = Chroma.from_documents(docs, embeddings)
|
@@ -73,11 +70,31 @@ def get_data(user_query: str):
|
|
73 |
print("LLM output:", output_text)
|
74 |
return f"# Your AI curated newsletter\n{output['output_text']}\n\n\n\n## Used articles:\n\n{articles}"
|
75 |
|
76 |
-
demo = gr.Interface(
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
)
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
|
14 |
LOAD_MAX_DOCS = 100
|
|
|
|
|
|
|
|
|
|
|
15 |
# CHUNK_SIZE = 1000
|
16 |
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE)
|
17 |
|
18 |
embeddings = HuggingFaceEmbeddings()
|
19 |
|
|
|
20 |
document_prompt = PromptTemplate(
|
21 |
template="Title: {Title}\nContent: {page_content}",
|
22 |
input_variables=["page_content", "Title"],
|
23 |
)
|
24 |
prompt = PromptTemplate(
|
25 |
+
template="""Write a personalised newsletter for a researcher on the most recent exciting developments in his field. The researcher describes his work as follows:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter. Use Markdown format\n#ARTICLES\n\n"{text}"\n\nNEWSLETTER:\n# Your AI curated newsletter\n""",
|
|
|
26 |
input_variables=["context", "text"])
|
27 |
|
28 |
# llm = FakeListLLM(responses=list(map(str, range(100))))
|
|
|
50 |
metadata["Body"] = doc.page_content
|
51 |
return Document(page_content=doc.metadata["Summary"], metadata=metadata)
|
52 |
|
53 |
+
def get_data(lookback_days: float, user_query: str):
|
54 |
print("User query:", user_query)
|
55 |
+
max_date = date.today()
|
56 |
+
min_date = (max_date - timedelta(days=3)).strftime('%Y%m%d')
|
57 |
+
query = f"cat:hep-th AND submittedDate:[{min_date.strftime('%Y%m%d')} TO {max_date.strftime('%Y%m%d')}]"
|
58 |
+
loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
|
59 |
docs = loader.load()
|
60 |
docs = [process_document(doc) for doc in docs]
|
61 |
db = Chroma.from_documents(docs, embeddings)
|
|
|
70 |
print("LLM output:", output_text)
|
71 |
return f"# Your AI curated newsletter\n{output['output_text']}\n\n\n\n## Used articles:\n\n{articles}"
|
72 |
|
73 |
+
# demo = gr.Interface(
|
74 |
+
# fn=get_data,
|
75 |
+
# inputs=[lookback_days, input_text]
|
76 |
+
# outputs=gr.Markdown(),
|
77 |
+
# title="Arxiv AI Curated Newsletter",
|
78 |
+
# description="Describe your field of research in a few words to get a newsletter-style summary of today's Arxiv articles.",
|
79 |
+
# )
|
80 |
+
|
81 |
+
with gr.Blocks() as demo:
|
82 |
+
gr.Markdown(
|
83 |
+
"""
|
84 |
+
# Arxiv AI Curated Newsletter
|
85 |
+
|
86 |
+
|
87 |
+
Get a newsletter-style summary of today's Arxiv articles personalised to your field of research.
|
88 |
+
"""
|
89 |
+
)
|
90 |
+
lookback_days = gr.Number(2, label="Articles from this many days in the past will be searched through.", minimum=1, maximum=7)
|
91 |
+
input_text = gr.Textbox(placeholder="Describe your field of research in a few words")
|
92 |
+
gr.Examples(
|
93 |
+
[["Supersymmetric Conformal Field Theory"], ["Black hole information paradox"]],
|
94 |
+
input_text,
|
95 |
+
)
|
96 |
+
output = gr.Markdown()
|
97 |
+
|
98 |
+
input_text.change(fn=get_data, inputs=[lookback_days,input_text], outputs=output)
|
99 |
+
|
100 |
+
demo.queue().launch()
|