Commit
·
6018f49
1
Parent(s):
cc25de1
Update title casing in app and optimize ID extraction logic in arvix
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ theme = gr.themes.Soft(
|
|
14 |
],
|
15 |
)
|
16 |
|
17 |
-
with gr.Blocks(theme=theme, title="
|
18 |
with gr.Row():
|
19 |
with gr.Column():
|
20 |
user_access_key = gr.Textbox(label="Access Key", placeholder="Enter your access key", type="password")
|
|
|
14 |
],
|
15 |
)
|
16 |
|
17 |
+
with gr.Blocks(theme=theme, title="Fetch Research Paper IDS") as app:
|
18 |
with gr.Row():
|
19 |
with gr.Column():
|
20 |
user_access_key = gr.Textbox(label="Access Key", placeholder="Enter your access key", type="password")
|
arvix.py
CHANGED
@@ -73,7 +73,6 @@ def extract_data(category):
|
|
73 |
tools.write_data_to_file(id, 'arxiv.txt')
|
74 |
sanitized_data.append(id)
|
75 |
random.shuffle(sanitized_data)
|
76 |
-
print(len(sanitized_data))
|
77 |
return sanitized_data
|
78 |
|
79 |
def extract_arxiv_data():
|
@@ -102,13 +101,21 @@ def extract_arxiv_data():
|
|
102 |
for category, subcategories in categories.items():
|
103 |
category_data = {}
|
104 |
all_ids = []
|
|
|
105 |
for subcategory in subcategories:
|
106 |
ids = extract_data(subcategory)
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
if len(all_ids) > 12:
|
110 |
print(f"Found more than 12 papers for {category}. Randomly selecting 12 papers.")
|
111 |
-
random.shuffle(all_ids)
|
112 |
all_ids = all_ids[:12]
|
113 |
category_data['count'] = len(all_ids)
|
114 |
category_data['ids'] = all_ids
|
|
|
73 |
tools.write_data_to_file(id, 'arxiv.txt')
|
74 |
sanitized_data.append(id)
|
75 |
random.shuffle(sanitized_data)
|
|
|
76 |
return sanitized_data
|
77 |
|
78 |
def extract_arxiv_data():
|
|
|
101 |
for category, subcategories in categories.items():
|
102 |
category_data = {}
|
103 |
all_ids = []
|
104 |
+
temp_id_storage = []
|
105 |
for subcategory in subcategories:
|
106 |
ids = extract_data(subcategory)
|
107 |
+
if len(ids) == 3:
|
108 |
+
for id in ids:
|
109 |
+
temp_id_storage.append(id)
|
110 |
+
else:
|
111 |
+
for id in ids:
|
112 |
+
all_ids.append(id)
|
113 |
+
for temp_id in temp_id_storage:
|
114 |
+
all_ids.append(temp_id)
|
115 |
+
random.shuffle(all_ids)
|
116 |
+
print(len(all_ids))
|
117 |
if len(all_ids) > 12:
|
118 |
print(f"Found more than 12 papers for {category}. Randomly selecting 12 papers.")
|
|
|
119 |
all_ids = all_ids[:12]
|
120 |
category_data['count'] = len(all_ids)
|
121 |
category_data['ids'] = all_ids
|