Spaces:
Running
Running
File size: 5,466 Bytes
5fd4781 1c57048 5fd4781 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
authorPubEdges <- function(keywords,pub_start_date,pub_end_date){
keywords <- keywords
pub_start_date <- pub_start_date
pub_end_date <- pub_end_date
# create search engine function
search_engine <- function(keywords,pub_start_date,pub_end_date){
suppressPackageStartupMessages(library(openalexR))
suppressPackageStartupMessages(library(tidyverse))
options(openalexR.mailto = "[email protected]")
# search engine
works_search <- oa_fetch(
entity = "works",
title.search = keywords,
cited_by_count = ">50",
from_publication_date = pub_start_date,
to_publication_date = pub_end_date,
options = list(sort = "cited_by_count:desc"),
verbose = FALSE
)
return(works_search)
}
# define nodes function
authorPubNodes <- function(keywords,pub_start_date,pub_end_date){
keywords <- keywords
pub_start_date <- pub_start_date
pub_end_date <- pub_end_date
# create search engine function
search_engine <- function(keywords,pub_start_date,pub_end_date){
suppressPackageStartupMessages(library(openalexR))
suppressPackageStartupMessages(library(tidyverse))
options(openalexR.mailto = "[email protected]")
# search engine
works_search <- oa_fetch(
entity = "works",
title.search = keywords,
cited_by_count = ">50",
from_publication_date = pub_start_date,
to_publication_date = pub_end_date,
options = list(sort = "cited_by_count:desc"),
verbose = FALSE
)
return(works_search)
}
search_data <- search_engine(keywords,pub_start_date,pub_end_date)
# grab authors and group them according to collaboration
authors_collaboration_groups <- list()
for (i in 1:nrow(search_data)){
authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
}
# grab all authors
all_authors <- c()
for (i in 1:length(authors_collaboration_groups)) {
all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
}
# get length of each authors collaboration
authors_length <- c()
for(authors in 1:length(authors_collaboration_groups)){
authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
}
# grab all publications
publications <- list()
for (i in 1:nrow(search_data)){
publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
}
# place all publications in a vector
all_publications <- c()
for(i in 1:length(publications)){
all_publications <- c(all_publications,publications[[i]])
}
# create author_to_publication data frame
authors_to_publications <- data.frame(
Authors = all_authors,
Publications = all_publications
)
# stack the df so that authors and publications
# are together as one column
stacked_df <- stack(authors_to_publications)
stacked_df <- unique.data.frame(stacked_df) # remove duplicate rows
stacked_df <- stacked_df[-2] # delete second column in df
# create author_publications_nodes df
author_publication_nodes <- data.frame(
Id = 1:nrow(stacked_df),
Nodes = stacked_df$values,
Label = stacked_df$values
)
return(author_publication_nodes)
}
# run author nodes function
author_nodes <- authorPubNodes(keywords,pub_start_date,pub_end_date)
# run search engine
search_data <- search_engine(keywords,pub_start_date,pub_end_date)
# grab authors and group them according to collaboration
authors_collaboration_groups <- list()
for (i in 1:nrow(search_data)){
authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
}
# grab all authors
all_authors <- c()
for (i in 1:length(authors_collaboration_groups)) {
all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
}
# get length of each authors collaboration
authors_length <- c()
for(authors in 1:length(authors_collaboration_groups)){
authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
}
# grab all publications
publications <- list()
for (i in 1:nrow(search_data)){
publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
}
# place all publications in a vector
all_publications <- c()
for(i in 1:length(publications)){
all_publications <- c(all_publications,publications[[i]])
}
# create author_to_publication data frame
authors_to_publications <- data.frame(
Authors = all_authors,
Publications = all_publications
)
# create edges data frame
author_publication_edges <- data.frame(
Source = authors_to_publications$Authors,
Target = authors_to_publications$Publications,
Type = "directed",
Weight = 1.0
)
# replace edges with id from nodes data set
replace_edges_with_ids <- function(author_edges, author_nodes) {
# Create a lookup table for node values to their corresponding Ids
node_lookup <- setNames(author_nodes$Id, author_nodes$Node)
# Use the lookup table to replace Source and Target values in author_edges
author_edges$Source <- node_lookup[author_edges$Source]
author_edges$Target <- node_lookup[author_edges$Target]
return(author_edges)
}
# Call the function with your data frames
author_publication_edges <- replace_edges_with_ids(author_publication_edges, author_nodes)
return(author_publication_edges)
}
|