Ifeanyi commited on
Commit
5fd4781
·
1 Parent(s): 4457e6d

Upload 3 files

Browse files
Files changed (3) hide show
  1. OpenAlexEdges.R +101 -0
  2. OpenAlexEdgesDisp.R +85 -0
  3. OpenAlexNodes.R +85 -0
OpenAlexEdges.R ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ authorPubEdges <- function(keywords,pub_start_date,pub_end_date){
2
+
3
+ keywords <- keywords
4
+ pub_start_date <- pub_start_date
5
+ pub_end_date <- pub_end_date
6
+
7
+ # create search engine function
8
+ search_engine <- function(keywords,pub_start_date,pub_end_date){
9
+ suppressPackageStartupMessages(library(openalexR))
10
+ suppressPackageStartupMessages(library(tidyverse))
11
+
12
+ options(openalexR.mailto = "[email protected]")
13
+
14
+ # search engine
15
+ works_search <- oa_fetch(
16
+ entity = "works",
17
+ title.search = keywords,
18
+ cited_by_count = ">50",
19
+ from_publication_date = pub_start_date,
20
+ to_publication_date = pub_end_date,
21
+ options = list(sort = "cited_by_count:desc"),
22
+ verbose = FALSE
23
+ )
24
+
25
+ return(works_search)
26
+
27
+ }
28
+
29
+ # import nodes function
30
+ source("openAlexNodes.R")
31
+
32
+ # run author nodes function
33
+ author_nodes <- authorPubNodes(keywords,pub_start_date,pub_end_date)
34
+
35
+ # run search engine
36
+ search_data <- search_engine(keywords,pub_start_date,pub_end_date)
37
+
38
+
39
+ # grab authors and group them according to collaboration
40
+ authors_collaboration_groups <- list()
41
+ for (i in 1:nrow(search_data)){
42
+ authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
43
+ }
44
+
45
+ # grab all authors
46
+ all_authors <- c()
47
+ for (i in 1:length(authors_collaboration_groups)) {
48
+ all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
49
+ }
50
+
51
+ # get length of each authors collaboration
52
+ authors_length <- c()
53
+ for(authors in 1:length(authors_collaboration_groups)){
54
+ authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
55
+ }
56
+
57
+ # grab all publications
58
+ publications <- list()
59
+ for (i in 1:nrow(search_data)){
60
+ publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
61
+ }
62
+
63
+ # place all publications in a vector
64
+ all_publications <- c()
65
+ for(i in 1:length(publications)){
66
+ all_publications <- c(all_publications,publications[[i]])
67
+ }
68
+
69
+ # create author_to_publication data frame
70
+ authors_to_publications <- data.frame(
71
+ Authors = all_authors,
72
+ Publications = all_publications
73
+ )
74
+
75
+ # create edges data frame
76
+ author_publication_edges <- data.frame(
77
+ Source = authors_to_publications$Authors,
78
+ Target = authors_to_publications$Publications,
79
+ Type = "directed",
80
+ Weight = 1.0
81
+ )
82
+
83
+ # replace edges with id from nodes data set
84
+ replace_edges_with_ids <- function(author_edges, author_nodes) {
85
+ # Create a lookup table for node values to their corresponding Ids
86
+ node_lookup <- setNames(author_nodes$Id, author_nodes$Node)
87
+
88
+ # Use the lookup table to replace Source and Target values in author_edges
89
+ author_edges$Source <- node_lookup[author_edges$Source]
90
+ author_edges$Target <- node_lookup[author_edges$Target]
91
+
92
+ return(author_edges)
93
+ }
94
+
95
+ # Call the function with your data frames
96
+ author_publication_edges <- replace_edges_with_ids(author_publication_edges, author_nodes)
97
+
98
+ return(author_publication_edges)
99
+
100
+
101
+ }
OpenAlexEdgesDisp.R ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ authorPubEdgesDisp <- function(keywords,pub_start_date,pub_end_date){
2
+
3
+ keywords <- keywords
4
+ pub_start_date <- pub_start_date
5
+ pub_end_date <- pub_end_date
6
+
7
+ # create search engine function
8
+ search_engine <- function(keywords,pub_start_date,pub_end_date){
9
+ suppressPackageStartupMessages(library(openalexR))
10
+ suppressPackageStartupMessages(library(tidyverse))
11
+
12
+ options(openalexR.mailto = "[email protected]")
13
+
14
+ # search engine
15
+ works_search <- oa_fetch(
16
+ entity = "works",
17
+ title.search = keywords,
18
+ cited_by_count = ">50",
19
+ from_publication_date = pub_start_date,
20
+ to_publication_date = pub_end_date,
21
+ options = list(sort = "cited_by_count:desc"),
22
+ verbose = FALSE
23
+ )
24
+
25
+ return(works_search)
26
+
27
+ }
28
+
29
+ search_data <- search_engine(keywords,pub_start_date,pub_end_date)
30
+
31
+
32
+
33
+ # grab authors and group them according to collaboration
34
+ authors_collaboration_groups <- list()
35
+ for (i in 1:nrow(search_data)){
36
+ authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
37
+ }
38
+
39
+ # grab all authors
40
+ all_authors <- c()
41
+ for (i in 1:length(authors_collaboration_groups)) {
42
+ all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
43
+ }
44
+
45
+ # get length of each authors collaboration
46
+ authors_length <- c()
47
+ for(authors in 1:length(authors_collaboration_groups)){
48
+ authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
49
+ }
50
+
51
+ # grab all publications
52
+ publications <- list()
53
+ for (i in 1:nrow(search_data)){
54
+ publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
55
+ }
56
+
57
+ # place all publications in a vector
58
+ all_publications <- c()
59
+ for(i in 1:length(publications)){
60
+ all_publications <- c(all_publications,publications[[i]])
61
+ }
62
+
63
+ # create author_to_publication data frame
64
+ authors_to_publications <- data.frame(
65
+ Authors = all_authors,
66
+ Publications = all_publications
67
+ )
68
+
69
+ # create edges data frame
70
+ author_publication_edges <- data.frame(
71
+ Source = authors_to_publications$Authors,
72
+ Target = authors_to_publications$Publications,
73
+ Type = "directed",
74
+ Weight = 1.0
75
+ )
76
+
77
+ return(author_publication_edges)
78
+
79
+ }
80
+
81
+ # author_edges <- authorPubEdgesDisp(keywords = c("information","analytics","management"),
82
+ # pub_start_date = "2022-01-01",
83
+ # pub_end_date = "2023-01-31")
84
+ #
85
+ # author_edges |> view()
OpenAlexNodes.R ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ authorPubNodes <- function(keywords,pub_start_date,pub_end_date){
2
+
3
+ keywords <- keywords
4
+ pub_start_date <- pub_start_date
5
+ pub_end_date <- pub_end_date
6
+
7
+ # create search engine function
8
+ search_engine <- function(keywords,pub_start_date,pub_end_date){
9
+ suppressPackageStartupMessages(library(openalexR))
10
+ suppressPackageStartupMessages(library(tidyverse))
11
+
12
+ options(openalexR.mailto = "[email protected]")
13
+
14
+ # search engine
15
+ works_search <- oa_fetch(
16
+ entity = "works",
17
+ title.search = keywords,
18
+ cited_by_count = ">50",
19
+ from_publication_date = pub_start_date,
20
+ to_publication_date = pub_end_date,
21
+ options = list(sort = "cited_by_count:desc"),
22
+ verbose = FALSE
23
+ )
24
+
25
+ return(works_search)
26
+
27
+ }
28
+
29
+ search_data <- search_engine(keywords,pub_start_date,pub_end_date)
30
+
31
+
32
+ # grab authors and group them according to collaboration
33
+ authors_collaboration_groups <- list()
34
+ for (i in 1:nrow(search_data)){
35
+ authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
36
+ }
37
+
38
+ # grab all authors
39
+ all_authors <- c()
40
+ for (i in 1:length(authors_collaboration_groups)) {
41
+ all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
42
+ }
43
+
44
+ # get length of each authors collaboration
45
+ authors_length <- c()
46
+ for(authors in 1:length(authors_collaboration_groups)){
47
+ authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
48
+ }
49
+
50
+ # grab all publications
51
+ publications <- list()
52
+ for (i in 1:nrow(search_data)){
53
+ publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
54
+ }
55
+
56
+ # place all publications in a vector
57
+ all_publications <- c()
58
+ for(i in 1:length(publications)){
59
+ all_publications <- c(all_publications,publications[[i]])
60
+ }
61
+
62
+ # create author_to_publication data frame
63
+ authors_to_publications <- data.frame(
64
+ Authors = all_authors,
65
+ Publications = all_publications
66
+ )
67
+
68
+ # stack the df so that authors and publications
69
+ # are together as one column
70
+ stacked_df <- stack(authors_to_publications)
71
+ stacked_df <- unique.data.frame(stacked_df) # remove duplicate rows
72
+ stacked_df <- stacked_df[-2] # delete second column in df
73
+
74
+ # create author_publications_nodes df
75
+ author_publication_nodes <- data.frame(
76
+ Id = 1:nrow(stacked_df),
77
+ Nodes = stacked_df$values,
78
+ Label = stacked_df$values
79
+ )
80
+
81
+
82
+ return(author_publication_nodes)
83
+
84
+
85
+ }