vincentmin commited on
Commit
5927a64
·
1 Parent(s): 1d3a54f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from datetime import date, timedelta
3
  from langchain.document_loaders import ArxivLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import Chroma
@@ -11,7 +11,7 @@ from langchain.schema import Document
11
 
12
 
13
  LOAD_MAX_DOCS = 100
14
-
15
  embeddings = HuggingFaceEmbeddings()
16
 
17
  document_prompt = PromptTemplate(
@@ -46,11 +46,22 @@ def process_document(doc: Document):
46
  metadata["Body"] = doc.page_content
47
  return Document(page_content=doc.metadata["Summary"], metadata=metadata)
48
 
 
 
 
 
 
 
 
 
 
 
49
  def get_data(category: str, lookback_days: float, user_query: str):
50
  print("User query:", user_query)
51
- max_date = date.today()
52
- min_date = (max_date - timedelta(days=lookback_days))
53
- query = f"cat:{category} AND submittedDate:[{min_date.strftime('%Y%m%d')} TO {max_date.strftime('%Y%m%d')}]"
 
54
  loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
55
  docs = [process_document(doc) for doc in loader.load()]
56
  if len(docs) == 0:
 
1
  import gradio as gr
2
+ from datetime import datetime, timedelta
3
  from langchain.document_loaders import ArxivLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import Chroma
 
11
 
12
 
13
  LOAD_MAX_DOCS = 100
14
+ FORMAT = '%Y%m%d%H%M%S'
15
  embeddings = HuggingFaceEmbeddings()
16
 
17
  document_prompt = PromptTemplate(
 
46
  metadata["Body"] = doc.page_content
47
  return Document(page_content=doc.metadata["Summary"], metadata=metadata)
48
 
49
+ def get_date_range(lookback_days: float):
50
+ max_date = datetime.today()
51
+ # Get the current date and time in UTC
52
+ now_utc = datetime.datetime.utcnow()
53
+ # Create a new datetime object for today at 18:00 UTC
54
+ today_1800_utc = datetime.datetime(now_utc.year, now_utc.month, now_utc.day, 18, 0, 0)
55
+ min_date = (today_1800_utc - timedelta(days=lookback_days+1))
56
+ # min_date = (max_date - timedelta(days=lookback_days))
57
+ return min_date, max_date
58
+
59
  def get_data(category: str, lookback_days: float, user_query: str):
60
  print("User query:", user_query)
61
+
62
+ min_date, max_date = get_date_range(lookback_days)
63
+ print(min_date, max_date)
64
+ query = f"cat:{category} AND submittedDate:[{min_date.strftime(FORMAT)} TO {max_date.strftime(FORMAT)}]"
65
  loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
66
  docs = [process_document(doc) for doc in loader.load()]
67
  if len(docs) == 0: