::say("After researching the articles and references by making graphs to
cowsaybetter visualize the structure of the research. We want to focus
here on the authors, trying to understand how communities evolve over time.")
Systematic literature review
A focus on authors, articles, references with networks
This was the original document metadata, which I am over-riding above, just to get things working in this documentation site.
---
title: "Systematic literature review"
bibliography: references.bib
title-block-banner: true
subtitle: "A focus on authors, articles, references with networks"
author:
- name: Olivier Caron
email: olivier.caron@dauphine.psl.eu
affiliations:
name: "Paris Dauphine - PSL"
city: Paris
state: France
- name: Christophe Benavent
email: christophe.benavent@dauphine.psl.eu
affiliations:
name: "Paris Dauphine - PSL"
city: Paris
state: France
date : "last-modified"
toc: true
number-sections: true
number-depth: 10
format:
html:
theme:
light: yeti
#dark: darkly
code-fold: true
code-summary: "Display code"
code-tools: true #enables to display/hide all blocks of code
code-copy: true #enables to copy code
grid:
body-width: 1000px
margin-width: 100px
toc: true
toc-location: left
execute:
echo: true
warning: false
message: false
editor: visual
fig-align: "center"
highlight-style: ayu
css: styles.css
reference-location: margin
---
1 Purpose
2 Libraries and preparing data
2.3 Correct the duplicate names
Let’s correct that by using one property of the distinct function: the .keep_all = TRUE
parameter. It keeps the first occurrence of each group, which is the first row encountered for each unique combination of authid
and authname
. It will be faster than manually changing the name of each author.
# Merge list_articles with result on the authid column
<- left_join(list_articles, result, by = "authid")
merged_df
# Replace authname values in list_articles with those from result
$authname <- ifelse(!is.na(merged_df$authname.y), merged_df$authname.y, list_articles$authname)
list_articles
# Keep only marketing articles and filter "Erratum" type of publications (=correction)
<- list_articles %>%
list_articles filter(marketing == 1) %>%
filter(subtypeDescription != "Erratum")
cat("There are", n_distinct(list_articles$entry_number), "articles and", n_distinct(list_articles$authname), "authors overall in the data.")
# Write the updated dataframe to a CSV file
write_csv2(list_articles, "nlp_full_data_final_unique_author_names.csv")
It is now done. We can check again if there are more than one unique authorname
per authid
.
2.4 Verification of duplicate names
<- list_articles %>%
test group_by(authid) %>%
select(authid, authname, entry_number) %>%
mutate(n = n())
<- test %>%
result group_by(authid) %>%
filter(n_distinct(authname) > 1) %>%
distinct(authid, .keep_all = TRUE) %>%
relocate(entry_number)
%>% reactable() result
It’s alright, we can now continue on constructing the data frames for the networks.
4 Graph density of references
# Create a dataframe with the density of each graph
#density_df_references = pd.DataFrame({
#'period': ['before-2013', '2013-2017', '2018-2021', '2022-2023', 'overall'],
#'density': [
#nx.density(G_before_2013_references),
#nx.density(G_2013_2017_references),
#nx.density(G_2018_2021_references),
#nx.density(G_2022_2023_references),
#nx.density(G_overall_references)
#],
#'average_degree': [
#average_degree(G_before_2013_references),
#average_degree(G_2013_2017_references),
#average_degree(G_2018_2021_references),
#average_degree(G_2022_2023_references),
#average_degree(G_overall_references)
#],
#'linear_density': [
#linear_density(G_before_2013_references),
#linear_density(G_2013_2017_references),
#linear_density(G_2018_2021_references),
#linear_density(G_2022_2023_references),
#linear_density(G_overall_references)
#]
#})
```{r}
#| label: citations-graph-density-comparison
#| fig.cap: Comparison of network densities and average degree of nodes over time
#| column: body-outset
# Density plot
density_plot <- ggplot() +
geom_line(data = py$density_df, aes(x = period, y = density, colour = "Collaboration Density", group=1, text = paste("Period:", period, "<br>Density:", density)), linewidth=1) +
geom_line(data = py$density_df_references %>% filter(period != "overall"), aes(x = period, y = density, colour = "References Density", group=1, text = paste("Period:", period, "<br>Density:", density)), linewidth=1) +
scale_y_continuous(name = "Graphs Density") +
scale_x_discrete(limits = c("before-2013", "2013-2017", "2018-2021", "2022-2023")) +
xlab("Period") +
ggtitle("Comparison of Network Density Over Time") +
theme_minimal()
# Linear density plot (m/n)
linear_density_plot <- ggplot() +
geom_line(data = py$density_df, aes(x = period, y = linear_density, colour = "Collaboration Linear Density", group=1, text = paste("Period:", period, "<br>Linear Density:", linear_density)), linewidth=1) + # Adjusted for the new column "linear_density"
geom_line(data = py$density_df_references %>% filter(period != "overall"), aes(x = period, y = linear_density, colour = "References Linear Density", group=1, text = paste("Period:", period, "<br>Linear Density:", linear_density)), linewidth=1) + # Adjusted for the new column "linear_density"
scale_y_continuous(name = "Graphs Linear Density") +
scale_x_discrete(limits = c("before-2013", "2013-2017", "2018-2021", "2022-2023")) +
xlab("Period") +
ggtitle("Comparison of Network Linear Density Over Time") +
theme_minimal()
# Create average degree plot
avg_degree_plot <- ggplot() +
geom_line(data = py$density_df, aes(x = period, y = average_degree, colour = "Collaboration Average Degree", group=1, text = paste("Period:", period, "<br>Average Degree:", average_degree)), linewidth=1) + # Corrected here
geom_line(data = py$density_df_references %>% filter(period != "overall"), aes(x = period, y = average_degree, colour = "References Average Degree", group=1, text = paste("Period:", period, "<br>Average Degree:", average_degree)), linewidth=1) +
scale_y_continuous(name = "Nodes Average Degree") +
scale_x_discrete(limits = c("before-2013", "2013-2017", "2018-2021", "2022-2023")) +
xlab("Period") +
ggtitle("Comparison of Network Average Degree Over Time") +
theme_minimal()
# Combine density and average degree plots
density_plot / linear_density_plot / avg_degree_plot
ggsave("images/citations-graph-density-comparison.png", width=270, height=180, units="cm", dpi=300)
```