Spaces:
Running
Running
authorPubEdges <- function(keywords,pub_start_date,pub_end_date){ | |
keywords <- keywords | |
pub_start_date <- pub_start_date | |
pub_end_date <- pub_end_date | |
# create search engine function | |
search_engine <- function(keywords,pub_start_date,pub_end_date){ | |
suppressPackageStartupMessages(library(openalexR)) | |
suppressPackageStartupMessages(library(tidyverse)) | |
options(openalexR.mailto = "[email protected]") | |
# search engine | |
works_search <- oa_fetch( | |
entity = "works", | |
title.search = keywords, | |
cited_by_count = ">50", | |
from_publication_date = pub_start_date, | |
to_publication_date = pub_end_date, | |
options = list(sort = "cited_by_count:desc"), | |
verbose = FALSE | |
) | |
return(works_search) | |
} | |
# define nodes function | |
authorPubNodes <- function(keywords,pub_start_date,pub_end_date){ | |
keywords <- keywords | |
pub_start_date <- pub_start_date | |
pub_end_date <- pub_end_date | |
# create search engine function | |
search_engine <- function(keywords,pub_start_date,pub_end_date){ | |
suppressPackageStartupMessages(library(openalexR)) | |
suppressPackageStartupMessages(library(tidyverse)) | |
options(openalexR.mailto = "[email protected]") | |
# search engine | |
works_search <- oa_fetch( | |
entity = "works", | |
title.search = keywords, | |
cited_by_count = ">50", | |
from_publication_date = pub_start_date, | |
to_publication_date = pub_end_date, | |
options = list(sort = "cited_by_count:desc"), | |
verbose = FALSE | |
) | |
return(works_search) | |
} | |
search_data <- search_engine(keywords,pub_start_date,pub_end_date) | |
# grab authors and group them according to collaboration | |
authors_collaboration_groups <- list() | |
for (i in 1:nrow(search_data)){ | |
authors_collaboration_groups[[i]] <- search_data$author[[i]][2] | |
} | |
# grab all authors | |
all_authors <- c() | |
for (i in 1:length(authors_collaboration_groups)) { | |
all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]]) | |
} | |
# get length of each authors collaboration | |
authors_length <- c() | |
for(authors in 1:length(authors_collaboration_groups)){ | |
authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow()) | |
} | |
# grab all publications | |
publications <- list() | |
for (i in 1:nrow(search_data)){ | |
publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i]) | |
} | |
# place all publications in a vector | |
all_publications <- c() | |
for(i in 1:length(publications)){ | |
all_publications <- c(all_publications,publications[[i]]) | |
} | |
# create author_to_publication data frame | |
authors_to_publications <- data.frame( | |
Authors = all_authors, | |
Publications = all_publications | |
) | |
# stack the df so that authors and publications | |
# are together as one column | |
stacked_df <- stack(authors_to_publications) | |
stacked_df <- unique.data.frame(stacked_df) # remove duplicate rows | |
stacked_df <- stacked_df[-2] # delete second column in df | |
# create author_publications_nodes df | |
author_publication_nodes <- data.frame( | |
Id = 1:nrow(stacked_df), | |
Nodes = stacked_df$values, | |
Label = stacked_df$values | |
) | |
return(author_publication_nodes) | |
} | |
# run author nodes function | |
author_nodes <- authorPubNodes(keywords,pub_start_date,pub_end_date) | |
# run search engine | |
search_data <- search_engine(keywords,pub_start_date,pub_end_date) | |
# grab authors and group them according to collaboration | |
authors_collaboration_groups <- list() | |
for (i in 1:nrow(search_data)){ | |
authors_collaboration_groups[[i]] <- search_data$author[[i]][2] | |
} | |
# grab all authors | |
all_authors <- c() | |
for (i in 1:length(authors_collaboration_groups)) { | |
all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]]) | |
} | |
# get length of each authors collaboration | |
authors_length <- c() | |
for(authors in 1:length(authors_collaboration_groups)){ | |
authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow()) | |
} | |
# grab all publications | |
publications <- list() | |
for (i in 1:nrow(search_data)){ | |
publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i]) | |
} | |
# place all publications in a vector | |
all_publications <- c() | |
for(i in 1:length(publications)){ | |
all_publications <- c(all_publications,publications[[i]]) | |
} | |
# create author_to_publication data frame | |
authors_to_publications <- data.frame( | |
Authors = all_authors, | |
Publications = all_publications | |
) | |
# create edges data frame | |
author_publication_edges <- data.frame( | |
Source = authors_to_publications$Authors, | |
Target = authors_to_publications$Publications, | |
Type = "directed", | |
Weight = 1.0 | |
) | |
# replace edges with id from nodes data set | |
replace_edges_with_ids <- function(author_edges, author_nodes) { | |
# Create a lookup table for node values to their corresponding Ids | |
node_lookup <- setNames(author_nodes$Id, author_nodes$Node) | |
# Use the lookup table to replace Source and Target values in author_edges | |
author_edges$Source <- node_lookup[author_edges$Source] | |
author_edges$Target <- node_lookup[author_edges$Target] | |
return(author_edges) | |
} | |
# Call the function with your data frames | |
author_publication_edges <- replace_edges_with_ids(author_publication_edges, author_nodes) | |
return(author_publication_edges) | |
} | |