Spaces:
Sleeping
Sleeping
vincentmin
commited on
Commit
·
5927a64
1
Parent(s):
1d3a54f
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from datetime import
|
3 |
from langchain.document_loaders import ArxivLoader
|
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
from langchain.vectorstores import Chroma
|
@@ -11,7 +11,7 @@ from langchain.schema import Document
|
|
11 |
|
12 |
|
13 |
LOAD_MAX_DOCS = 100
|
14 |
-
|
15 |
embeddings = HuggingFaceEmbeddings()
|
16 |
|
17 |
document_prompt = PromptTemplate(
|
@@ -46,11 +46,22 @@ def process_document(doc: Document):
|
|
46 |
metadata["Body"] = doc.page_content
|
47 |
return Document(page_content=doc.metadata["Summary"], metadata=metadata)
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
def get_data(category: str, lookback_days: float, user_query: str):
|
50 |
print("User query:", user_query)
|
51 |
-
|
52 |
-
min_date
|
53 |
-
|
|
|
54 |
loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
|
55 |
docs = [process_document(doc) for doc in loader.load()]
|
56 |
if len(docs) == 0:
|
|
|
1 |
import gradio as gr
|
2 |
+
from datetime import datetime, timedelta
|
3 |
from langchain.document_loaders import ArxivLoader
|
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
from langchain.vectorstores import Chroma
|
|
|
11 |
|
12 |
|
13 |
LOAD_MAX_DOCS = 100
|
14 |
+
FORMAT = '%Y%m%d%H%M%S'
|
15 |
embeddings = HuggingFaceEmbeddings()
|
16 |
|
17 |
document_prompt = PromptTemplate(
|
|
|
46 |
metadata["Body"] = doc.page_content
|
47 |
return Document(page_content=doc.metadata["Summary"], metadata=metadata)
|
48 |
|
49 |
+
def get_date_range(lookback_days: float):
|
50 |
+
max_date = datetime.today()
|
51 |
+
# Get the current date and time in UTC
|
52 |
+
now_utc = datetime.datetime.utcnow()
|
53 |
+
# Create a new datetime object for today at 18:00 UTC
|
54 |
+
today_1800_utc = datetime.datetime(now_utc.year, now_utc.month, now_utc.day, 18, 0, 0)
|
55 |
+
min_date = (today_1800_utc - timedelta(days=lookback_days+1))
|
56 |
+
# min_date = (max_date - timedelta(days=lookback_days))
|
57 |
+
return min_date, max_date
|
58 |
+
|
59 |
def get_data(category: str, lookback_days: float, user_query: str):
|
60 |
print("User query:", user_query)
|
61 |
+
|
62 |
+
min_date, max_date = get_date_range(lookback_days)
|
63 |
+
print(min_date, max_date)
|
64 |
+
query = f"cat:{category} AND submittedDate:[{min_date.strftime(FORMAT)} TO {max_date.strftime(FORMAT)}]"
|
65 |
loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
|
66 |
docs = [process_document(doc) for doc in loader.load()]
|
67 |
if len(docs) == 0:
|