# Load the required packages
library(parameters)
library(datawizard)
library(tidygraph)
library(ggraph)
library(ggplot2)
plot_graph <- function(fa_res,
threshold = 0.3,
loading_text_size = 2.8,
arrow_end_gap = 0.10,
factor_node_size = c(22, 35),
expand = c(0.5, 0.5),
names_factors = NULL,
color_variables = "#95A5A6",
color_factors = "#2C3E50"
) {
meta_cols <- c("Complexity", "Uniqueness", "MSA", "Mean", "SD")
# Helper function to process colors and reorder nodes
process_colors_and_order <- function(items, color_input, default_color) {
if (is.null(color_input)) {
return(list(items = items, colors = rep(default_color, length(items))))
}
if (is.list(color_input)) color_input <- unlist(color_input)
# 1. Handle named list/vector (e.g., c("Var3" = "red", "Var1" = "blue"))
if (!is.null(names(color_input))) {
input_names <- names(color_input)
# Match against existing nodes
valid_names <- input_names[input_names %in% items]
missing_items <- setdiff(items, valid_names)
# Reorder: Listed items first, missing items follow
ordered_items <- c(valid_names, missing_items)
# Assign colors based on new order
mapped_colors <- color_input[ordered_items]
mapped_colors[is.na(mapped_colors)] <- default_color # Fallback for missing
return(list(items = ordered_items, colors = unname(mapped_colors)))
}
# 2. Handle single color value
if (length(color_input) == 1) {
return(list(items = items, colors = rep(color_input, length(items))))
}
# 3. Handle unnamed vector of matching length
if (length(color_input) == length(items)) {
return(list(items = items, colors = color_input))
}
# Fallback
warning("Color vector length does not match number of nodes. Using default color.")
return(list(items = items, colors = rep(default_color, length(items))))
}
# 1. Extract ALL loadings first
df_all <- fa_res |>
as.data.frame() |>
data_remove(meta_cols) |>
data_to_long(
select = -Variable,
names_to = "Factor",
values_to = "Loading"
)
# Process Variables (Color & Order)
var_processed <- process_colors_and_order(unique(df_all$Variable), color_variables, "#95A5A6")
variables <- var_processed$items
var_colors <- var_processed$colors
n_var <- length(variables)
# Process Factors (Color & Order)
fac_processed <- process_colors_and_order(unique(df_all$Factor), color_factors, "#2C3E50")
original_factors <- fac_processed$items
fac_colors <- fac_processed$colors
n_fac <- length(original_factors)
# Process Custom Factor Names for Labels
display_factors <- original_factors
if (!is.null(names_factors)) {
if (!is.null(names(names_factors))) {
if (any(unlist(names_factors) %in% original_factors)) {
lookup <- setNames(names(names_factors), unlist(names_factors))
} else {
lookup <- setNames(unlist(names_factors), names(names_factors))
}
matched <- original_factors %in% names(lookup)
display_factors[matched] <- lookup[original_factors[matched]]
} else if (length(names_factors) == n_fac) {
display_factors <- unlist(names_factors)
} else {
warning("`names_factors` must be named, or match the exact number of factors. Ignoring custom names.")
}
}
# 2. Extract Variance Explained
var_attr <- attributes(fa_res)$variance
if (!is.null(var_attr)) {
if (is.numeric(var_attr)) {
prop_var <- var_attr
} else if (is.data.frame(var_attr) && "Variance" %in% names(var_attr)) {
prop_var <- var_attr$Variance
}
if (is.null(names(prop_var)) && length(prop_var) >= n_fac) {
# Names map to the reordered list
names(prop_var) <- original_factors
}
} else {
ss_loadings <- tapply(df_all$Loading^2, df_all$Factor, sum)
prop_var <- as.numeric(ss_loadings / n_var)
names(prop_var) <- names(ss_loadings)
}
if (max(prop_var, na.rm = TRUE) > 1) {
prop_var <- prop_var / 100
}
# 3. Filter by threshold and reshape
edges <- df_all |>
subset(abs(Loading) >= threshold) |>
data_rename(
pattern = c("Variable", "Factor", "Loading"),
replacement = c("from", "to", "weight")
)
# 4. Build a Manual Layout
# Variables map to coordinates decreasing from n_var -> 1 (Places first item at the top)
y_var <- seq(n_var, 1)
y_fac <- seq(
from = n_var - 0.5,
to = 1.5,
length.out = n_fac
)
nodes <- data.frame(
name = c(original_factors, variables),
type = c(rep("Factor", n_fac), rep("Variable", n_var)),
x = c(rep(1, n_fac), rep(0, n_var)),
y = c(y_fac, y_var),
variance = c(prop_var[original_factors], rep(NA, n_var)),
label_text = c(
sprintf("%s\n(%.1f%%)", display_factors, prop_var[original_factors] * 100),
variables
),
node_color = c(fac_colors, var_colors) # Append our mapped colors
)
# 5. Build the tidygraph object
graph <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
# 6. Plot using ggraph
ggraph(graph, layout = "manual", x = x, y = y) +
# -- EDGES --
geom_edge_link(
aes(
edge_width = abs(weight),
edge_alpha = abs(weight),
color = weight,
label = sub("^(-?)0\\.", "\\1.", sprintf("%.2f", weight))
),
arrow = arrow(length = unit(4, 'mm'), type = "closed"),
start_cap = circle(0, 'mm'),
end_cap = circle(arrow_end_gap, 'snpc'),
angle_calc = 'along',
label_dodge = unit(2.5, 'mm'),
label_size = loading_text_size
) +
# -- FACTOR NODES --
geom_node_point(
aes(filter = type == "Factor", size = variance, fill = node_color),
shape = 21,
color = "white",
stroke = 1.5,
show.legend = FALSE
) +
# -- FACTOR TEXT --
geom_node_text(
aes(filter = type == "Factor", label = label_text),
color = "white",
fontface = "bold",
size = 3.5,
lineheight = 0.9
) +
# -- VARIABLE NODES --
geom_node_label(
aes(filter = type == "Variable", label = label_text, fill = node_color),
color = "white",
fontface = "bold",
size = 3.5,
hjust = 1,
label.padding = unit(0.5, "lines"),
show.legend = FALSE
) +
# -- SCALES & AESTHETICS --
scale_fill_identity() + # Evaluates our hex colors natively
scale_size_continuous(range = factor_node_size, guide = "none") +
scale_edge_color_gradient2(
low = "#E74C3C", mid = "grey85", high = "#2ECC71",
midpoint = 0, guide = "none"
) +
scale_edge_width_continuous(range = c(0.5, 2.5), guide = "none") +
scale_edge_alpha_continuous(range = c(0.4, 1), guide = "none") +
# -- CANVAS EXPANSION & THEME --
scale_x_continuous(expand = expansion(add = expand)) +
coord_cartesian(clip = "off") +
theme_graph() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.margin = margin(20, 20, 20, 20)
) +
labs(title = "Factor Analysis Loadings")
}
Current
Alternative: graph
Suggestion: perhaps this could be added as an alternative plot method called if
plot(fa_rez, graph=TRUE)Code