raannakasturi commited on
Commit
6018f49
·
1 Parent(s): cc25de1

Update title casing in app and optimize ID extraction logic in arvix

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. arvix.py +11 -4
app.py CHANGED
@@ -14,7 +14,7 @@ theme = gr.themes.Soft(
14
  ],
15
  )
16
 
17
- with gr.Blocks(theme=theme, title="fetch Research Paper IDS") as app:
18
  with gr.Row():
19
  with gr.Column():
20
  user_access_key = gr.Textbox(label="Access Key", placeholder="Enter your access key", type="password")
 
14
  ],
15
  )
16
 
17
+ with gr.Blocks(theme=theme, title="Fetch Research Paper IDS") as app:
18
  with gr.Row():
19
  with gr.Column():
20
  user_access_key = gr.Textbox(label="Access Key", placeholder="Enter your access key", type="password")
arvix.py CHANGED
@@ -73,7 +73,6 @@ def extract_data(category):
73
  tools.write_data_to_file(id, 'arxiv.txt')
74
  sanitized_data.append(id)
75
  random.shuffle(sanitized_data)
76
- print(len(sanitized_data))
77
  return sanitized_data
78
 
79
  def extract_arxiv_data():
@@ -102,13 +101,21 @@ def extract_arxiv_data():
102
  for category, subcategories in categories.items():
103
  category_data = {}
104
  all_ids = []
 
105
  for subcategory in subcategories:
106
  ids = extract_data(subcategory)
107
- for id in ids:
108
- all_ids.append(id)
 
 
 
 
 
 
 
 
109
  if len(all_ids) > 12:
110
  print(f"Found more than 12 papers for {category}. Randomly selecting 12 papers.")
111
- random.shuffle(all_ids)
112
  all_ids = all_ids[:12]
113
  category_data['count'] = len(all_ids)
114
  category_data['ids'] = all_ids
 
73
  tools.write_data_to_file(id, 'arxiv.txt')
74
  sanitized_data.append(id)
75
  random.shuffle(sanitized_data)
 
76
  return sanitized_data
77
 
78
  def extract_arxiv_data():
 
101
  for category, subcategories in categories.items():
102
  category_data = {}
103
  all_ids = []
104
+ temp_id_storage = []
105
  for subcategory in subcategories:
106
  ids = extract_data(subcategory)
107
+ if len(ids) == 3:
108
+ for id in ids:
109
+ temp_id_storage.append(id)
110
+ else:
111
+ for id in ids:
112
+ all_ids.append(id)
113
+ for temp_id in temp_id_storage:
114
+ all_ids.append(temp_id)
115
+ random.shuffle(all_ids)
116
+ print(len(all_ids))
117
  if len(all_ids) > 12:
118
  print(f"Found more than 12 papers for {category}. Randomly selecting 12 papers.")
 
119
  all_ids = all_ids[:12]
120
  category_data['count'] = len(all_ids)
121
  category_data['ids'] = all_ids