Spaces:
Sleeping
Sleeping
File size: 3,986 Bytes
8c26ae0 bb7257e ee043ca bb7257e a326270 820771b bb7257e ee21478 a326270 ee21478 a326270 d3db9a3 a326270 45e331f a326270 3baeead d3db9a3 0268ea7 d3db9a3 3baeead bb7257e 3baeead a326270 3baeead a326270 45e331f 0268ea7 bb7257e d3db9a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import gradio as gr
from datetime import date, timedelta
from langchain.document_loaders import ArxivLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
# from langchain.llms import FakeListLLM
from langchain.chains import LLMChain, StuffDocumentsChain
from langchain.prompts import PromptTemplate
from langchain.schema import Document
LOAD_MAX_DOCS = 100
# CHUNK_SIZE = 1000
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE)
embeddings = HuggingFaceEmbeddings()
document_prompt = PromptTemplate(
template="Title: {Title}\nContent: {page_content}",
input_variables=["page_content", "Title"],
)
prompt = PromptTemplate(
template="""Write a personalised newsletter for a researcher on the most recent exciting developments in his field. The researcher describes his work as follows:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter. Use Markdown format\n#ARTICLES\n\n"{text}"\n\nNEWSLETTER:\n# Your AI curated newsletter\n""",
input_variables=["context", "text"])
# llm = FakeListLLM(responses=list(map(str, range(100))))
REPO_ID = "HuggingFaceH4/starchat-beta"
llm = HuggingFaceHub(
repo_id=REPO_ID,
model_kwargs={
"max_new_tokens": 300,
"do_sample": True,
"temperature": 0.8,
"top_p": 0.9
}
)
llm_chain = LLMChain(llm=llm, prompt=prompt, verbose=True)
stuff_chain = StuffDocumentsChain(
llm_chain=llm_chain,
document_variable_name="text",
document_prompt=document_prompt,
verbose=True,
)
def process_document(doc: Document):
metadata = doc.metadata
metadata["Body"] = doc.page_content
return Document(page_content=doc.metadata["Summary"], metadata=metadata)
def get_data(lookback_days: float, user_query: str):
print("User query:", user_query)
max_date = date.today()
min_date = (max_date - timedelta(days=3)).strftime('%Y%m%d')
query = f"cat:hep-th AND submittedDate:[{min_date.strftime('%Y%m%d')} TO {max_date.strftime('%Y%m%d')}]"
loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
docs = loader.load()
docs = [process_document(doc) for doc in docs]
db = Chroma.from_documents(docs, embeddings)
retriever = db.as_retriever()
relevant_docs = retriever.get_relevant_documents(user_query)
print(relevant_docs[0].metadata)
articles = ""
for doc in relevant_docs:
articles += f"**Title: {doc.metadata['Title']}**\n\nAbstract: {doc.metadata['Summary']}\n\n"
output = stuff_chain({"input_documents": relevant_docs, "context": user_query})
output_text = output["output_text"].split("<|end|>")[0]
print("LLM output:", output_text)
return f"# Your AI curated newsletter\n{output['output_text']}\n\n\n\n## Used articles:\n\n{articles}"
# demo = gr.Interface(
# fn=get_data,
# inputs=[lookback_days, input_text]
# outputs=gr.Markdown(),
# title="Arxiv AI Curated Newsletter",
# description="Describe your field of research in a few words to get a newsletter-style summary of today's Arxiv articles.",
# )
with gr.Blocks() as demo:
gr.Markdown(
"""
# Arxiv AI Curated Newsletter
Get a newsletter-style summary of today's Arxiv articles personalised to your field of research.
"""
)
lookback_days = gr.Number(2, label="Articles from this many days in the past will be searched through.", minimum=1, maximum=7)
input_text = gr.Textbox(placeholder="Describe your field of research in a few words")
gr.Examples(
[["Supersymmetric Conformal Field Theory"], ["Black hole information paradox"]],
input_text,
)
output = gr.Markdown()
input_text.change(fn=get_data, inputs=[lookback_days,input_text], outputs=output)
demo.queue().launch()
|