hunkim commited on
Commit
4ffd5f1
·
1 Parent(s): 44190c7

Create crawl_index.py

Browse files
Files changed (1) hide show
  1. crawl_index.py +19 -0
crawl_index.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from scholarly import scholarly
2
+ import json
3
+ from es_gpt import ESGPT
4
+
5
+ # Create an instance of the ESGPT class
6
+ esgpt = ESGPT(index_name="papers")
7
+
8
+ # Search for papers by author ID
9
+ author = scholarly.search_author_id("JE_m2UgAAAAJ")
10
+ papers = scholarly.fill(author, sections=['publications'])
11
+ # Index each paper in Elasticsearch
12
+ for paper in papers['publications']:
13
+ paper = scholarly.fill(paper, sections=[])
14
+ print(paper)
15
+ paper_dict = paper['bib']
16
+ id = paper['author_pub_id']
17
+
18
+ # Index the paper in Elasticsearch
19
+ esgpt.index(doc_id=id, doc=paper_dict)