OpenAlex4Gephi / OpenAlexEdges.R
Ifeanyi's picture
Update OpenAlexEdges.R
1c57048 verified
authorPubEdges <- function(keywords,pub_start_date,pub_end_date){
keywords <- keywords
pub_start_date <- pub_start_date
pub_end_date <- pub_end_date
# create search engine function
search_engine <- function(keywords,pub_start_date,pub_end_date){
suppressPackageStartupMessages(library(openalexR))
suppressPackageStartupMessages(library(tidyverse))
options(openalexR.mailto = "[email protected]")
# search engine
works_search <- oa_fetch(
entity = "works",
title.search = keywords,
cited_by_count = ">50",
from_publication_date = pub_start_date,
to_publication_date = pub_end_date,
options = list(sort = "cited_by_count:desc"),
verbose = FALSE
)
return(works_search)
}
# define nodes function
authorPubNodes <- function(keywords,pub_start_date,pub_end_date){
keywords <- keywords
pub_start_date <- pub_start_date
pub_end_date <- pub_end_date
# create search engine function
search_engine <- function(keywords,pub_start_date,pub_end_date){
suppressPackageStartupMessages(library(openalexR))
suppressPackageStartupMessages(library(tidyverse))
options(openalexR.mailto = "[email protected]")
# search engine
works_search <- oa_fetch(
entity = "works",
title.search = keywords,
cited_by_count = ">50",
from_publication_date = pub_start_date,
to_publication_date = pub_end_date,
options = list(sort = "cited_by_count:desc"),
verbose = FALSE
)
return(works_search)
}
search_data <- search_engine(keywords,pub_start_date,pub_end_date)
# grab authors and group them according to collaboration
authors_collaboration_groups <- list()
for (i in 1:nrow(search_data)){
authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
}
# grab all authors
all_authors <- c()
for (i in 1:length(authors_collaboration_groups)) {
all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
}
# get length of each authors collaboration
authors_length <- c()
for(authors in 1:length(authors_collaboration_groups)){
authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
}
# grab all publications
publications <- list()
for (i in 1:nrow(search_data)){
publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
}
# place all publications in a vector
all_publications <- c()
for(i in 1:length(publications)){
all_publications <- c(all_publications,publications[[i]])
}
# create author_to_publication data frame
authors_to_publications <- data.frame(
Authors = all_authors,
Publications = all_publications
)
# stack the df so that authors and publications
# are together as one column
stacked_df <- stack(authors_to_publications)
stacked_df <- unique.data.frame(stacked_df) # remove duplicate rows
stacked_df <- stacked_df[-2] # delete second column in df
# create author_publications_nodes df
author_publication_nodes <- data.frame(
Id = 1:nrow(stacked_df),
Nodes = stacked_df$values,
Label = stacked_df$values
)
return(author_publication_nodes)
}
# run author nodes function
author_nodes <- authorPubNodes(keywords,pub_start_date,pub_end_date)
# run search engine
search_data <- search_engine(keywords,pub_start_date,pub_end_date)
# grab authors and group them according to collaboration
authors_collaboration_groups <- list()
for (i in 1:nrow(search_data)){
authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
}
# grab all authors
all_authors <- c()
for (i in 1:length(authors_collaboration_groups)) {
all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
}
# get length of each authors collaboration
authors_length <- c()
for(authors in 1:length(authors_collaboration_groups)){
authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
}
# grab all publications
publications <- list()
for (i in 1:nrow(search_data)){
publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
}
# place all publications in a vector
all_publications <- c()
for(i in 1:length(publications)){
all_publications <- c(all_publications,publications[[i]])
}
# create author_to_publication data frame
authors_to_publications <- data.frame(
Authors = all_authors,
Publications = all_publications
)
# create edges data frame
author_publication_edges <- data.frame(
Source = authors_to_publications$Authors,
Target = authors_to_publications$Publications,
Type = "directed",
Weight = 1.0
)
# replace edges with id from nodes data set
replace_edges_with_ids <- function(author_edges, author_nodes) {
# Create a lookup table for node values to their corresponding Ids
node_lookup <- setNames(author_nodes$Id, author_nodes$Node)
# Use the lookup table to replace Source and Target values in author_edges
author_edges$Source <- node_lookup[author_edges$Source]
author_edges$Target <- node_lookup[author_edges$Target]
return(author_edges)
}
# Call the function with your data frames
author_publication_edges <- replace_edges_with_ids(author_publication_edges, author_nodes)
return(author_publication_edges)
}