Skip to content

Alternative plot method for factor_analysis() #432

@DominiqueMakowski

Description

@DominiqueMakowski

Current

f <- parameters::factor_analysis(mtcars, n=3)
plot(f)
Image

Alternative: graph

plot_graph()
Image

Suggestion: perhaps this could be added as an alternative plot method called if plot(fa_rez, graph=TRUE)

Code

# Load the required packages
library(parameters)
library(datawizard) 
library(tidygraph)
library(ggraph)
library(ggplot2)


plot_graph <- function(fa_res, 
                       threshold = 0.3, 
                       loading_text_size = 2.8,
                       arrow_end_gap = 0.10,         
                       factor_node_size = c(22, 35),
                       expand = c(0.5, 0.5),
                       names_factors = NULL,
                       color_variables = "#95A5A6",
                       color_factors = "#2C3E50"
) {
  
  meta_cols <- c("Complexity", "Uniqueness", "MSA", "Mean", "SD")
  
  # Helper function to process colors and reorder nodes
  process_colors_and_order <- function(items, color_input, default_color) {
    if (is.null(color_input)) {
      return(list(items = items, colors = rep(default_color, length(items))))
    }
    
    if (is.list(color_input)) color_input <- unlist(color_input)
    
    # 1. Handle named list/vector (e.g., c("Var3" = "red", "Var1" = "blue"))
    if (!is.null(names(color_input))) {
      input_names <- names(color_input)
      
      # Match against existing nodes
      valid_names <- input_names[input_names %in% items]
      missing_items <- setdiff(items, valid_names)
      
      # Reorder: Listed items first, missing items follow
      ordered_items <- c(valid_names, missing_items)
      
      # Assign colors based on new order
      mapped_colors <- color_input[ordered_items]
      mapped_colors[is.na(mapped_colors)] <- default_color # Fallback for missing
      
      return(list(items = ordered_items, colors = unname(mapped_colors)))
    }
    
    # 2. Handle single color value
    if (length(color_input) == 1) {
      return(list(items = items, colors = rep(color_input, length(items))))
    }
    
    # 3. Handle unnamed vector of matching length
    if (length(color_input) == length(items)) {
      return(list(items = items, colors = color_input))
    }
    
    # Fallback
    warning("Color vector length does not match number of nodes. Using default color.")
    return(list(items = items, colors = rep(default_color, length(items))))
  }
  
  
  # 1. Extract ALL loadings first
  df_all <- fa_res |>
    as.data.frame() |>
    data_remove(meta_cols) |>
    data_to_long(
      select = -Variable,
      names_to = "Factor",
      values_to = "Loading"
    )
  
  # Process Variables (Color & Order)
  var_processed <- process_colors_and_order(unique(df_all$Variable), color_variables, "#95A5A6")
  variables <- var_processed$items
  var_colors <- var_processed$colors
  n_var <- length(variables)
  
  # Process Factors (Color & Order)
  fac_processed <- process_colors_and_order(unique(df_all$Factor), color_factors, "#2C3E50")
  original_factors <- fac_processed$items
  fac_colors <- fac_processed$colors
  n_fac <- length(original_factors)
  
  # Process Custom Factor Names for Labels
  display_factors <- original_factors
  
  if (!is.null(names_factors)) {
    if (!is.null(names(names_factors))) {
      if (any(unlist(names_factors) %in% original_factors)) {
        lookup <- setNames(names(names_factors), unlist(names_factors))
      } else {
        lookup <- setNames(unlist(names_factors), names(names_factors))
      }
      matched <- original_factors %in% names(lookup)
      display_factors[matched] <- lookup[original_factors[matched]]
      
    } else if (length(names_factors) == n_fac) {
      display_factors <- unlist(names_factors)
    } else {
      warning("`names_factors` must be named, or match the exact number of factors. Ignoring custom names.")
    }
  }
  
  # 2. Extract Variance Explained
  var_attr <- attributes(fa_res)$variance
  
  if (!is.null(var_attr)) {
    if (is.numeric(var_attr)) {
      prop_var <- var_attr
    } else if (is.data.frame(var_attr) && "Variance" %in% names(var_attr)) {
      prop_var <- var_attr$Variance
    }
    
    if (is.null(names(prop_var)) && length(prop_var) >= n_fac) {
      # Names map to the reordered list
      names(prop_var) <- original_factors 
    }
  } else {
    ss_loadings <- tapply(df_all$Loading^2, df_all$Factor, sum)
    prop_var <- as.numeric(ss_loadings / n_var)
    names(prop_var) <- names(ss_loadings)
  }
  
  if (max(prop_var, na.rm = TRUE) > 1) {
    prop_var <- prop_var / 100
  }
  
  # 3. Filter by threshold and reshape
  edges <- df_all |>
    subset(abs(Loading) >= threshold) |>
    data_rename(
      pattern = c("Variable", "Factor", "Loading"),
      replacement = c("from", "to", "weight")
    )
  
  # 4. Build a Manual Layout
  # Variables map to coordinates decreasing from n_var -> 1 (Places first item at the top)
  y_var <- seq(n_var, 1)
  
  y_fac <- seq(
    from = n_var - 0.5, 
    to = 1.5, 
    length.out = n_fac
  )
  
  nodes <- data.frame(
    name = c(original_factors, variables),
    type = c(rep("Factor", n_fac), rep("Variable", n_var)),
    x = c(rep(1, n_fac), rep(0, n_var)),
    y = c(y_fac, y_var),
    variance = c(prop_var[original_factors], rep(NA, n_var)),
    label_text = c(
      sprintf("%s\n(%.1f%%)", display_factors, prop_var[original_factors] * 100), 
      variables
    ),
    node_color = c(fac_colors, var_colors) # Append our mapped colors
  )
  
  # 5. Build the tidygraph object
  graph <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
  
  # 6. Plot using ggraph
  ggraph(graph, layout = "manual", x = x, y = y) + 
    
    # -- EDGES --
    geom_edge_link(
      aes(
        edge_width = abs(weight),
        edge_alpha = abs(weight),
        color = weight,
        label = sub("^(-?)0\\.", "\\1.", sprintf("%.2f", weight))
      ),
      arrow = arrow(length = unit(4, 'mm'), type = "closed"),
      start_cap = circle(0, 'mm'),    
      end_cap = circle(arrow_end_gap, 'snpc'), 
      angle_calc = 'along',
      label_dodge = unit(2.5, 'mm'),  
      label_size = loading_text_size
    ) +
    
    # -- FACTOR NODES --
    geom_node_point(
      aes(filter = type == "Factor", size = variance, fill = node_color),
      shape = 21,
      color = "white",
      stroke = 1.5,
      show.legend = FALSE
    ) +
    
    # -- FACTOR TEXT --
    geom_node_text(
      aes(filter = type == "Factor", label = label_text),
      color = "white",
      fontface = "bold",
      size = 3.5,
      lineheight = 0.9 
    ) +
    
    # -- VARIABLE NODES --
    geom_node_label(
      aes(filter = type == "Variable", label = label_text, fill = node_color),
      color = "white",
      fontface = "bold",
      size = 3.5,
      hjust = 1, 
      label.padding = unit(0.5, "lines"),
      show.legend = FALSE
    ) +
    
    # -- SCALES & AESTHETICS --
    scale_fill_identity() + # Evaluates our hex colors natively
    scale_size_continuous(range = factor_node_size, guide = "none") +
    scale_edge_color_gradient2(
      low = "#E74C3C", mid = "grey85", high = "#2ECC71",
      midpoint = 0, guide = "none" 
    ) +
    scale_edge_width_continuous(range = c(0.5, 2.5), guide = "none") +
    scale_edge_alpha_continuous(range = c(0.4, 1), guide = "none") +
    
    # -- CANVAS EXPANSION & THEME --
    scale_x_continuous(expand = expansion(add = expand)) +
    coord_cartesian(clip = "off") + 
    
    theme_graph() +
    theme(
      plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
      plot.margin = margin(20, 20, 20, 20) 
    ) +
    labs(title = "Factor Analysis Loadings")
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions