I have this graph network in R:
library(igraph)
n_rows <- 10
n_cols <- 5
g <- make_lattice(dimvector = c(n_cols, n_rows))
layout <- layout_on_grid(g, width = n_cols)
n_nodes <- vcount(g)
node_colors <- rep("white", n_nodes)
for (row in 0:(n_rows-1)) {
start_index <- row * n_cols + 1
node_colors[start_index:(start_index+2)] <- "orange"
node_colors[(start_index+3):(start_index+4)] <- "purple"
}
node_labels <- 1:n_nodes
plot(g,
layout = layout,
vertex.color = node_colors,
vertex.label = node_labels,
vertex.label.color = "black",
vertex.size = 15,
edge.color = "gray",
main = "Rectangular Undirected Network")
I am trying to write a function which randomly breaks this network into 5 connected subgraphs (i.e. mini graphs) such that each node appears exactly once.
I think in theory, this should not be too difficult to do. I would need to randomly identify a node, randomly decide how many neighbors to include, select those neighbors and remove them from the graph .... and restart this process on the remaining graph. Of course, some additional details would need to be specified, e.g. if the random number specified exceed the number of remaining nodes then use a max function, BFS would need to be used to select the nodes, etc.
Here was my first attempt at writing the code:
get_connected_subgraph <- function(graph, available_nodes, min_nodes = 5, max_nodes = 15) {
if (length(available_nodes) == 0) return(NULL)
start_node <- sample(available_nodes, 1)
bfs_result <- bfs(graph, root = start_node, unreachable = FALSE, order = TRUE, rank = TRUE, father = TRUE)
bfs_order <- intersect(bfs_result$order, available_nodes)
n_subgraph_nodes <- min(sample(min_nodes:max_nodes, 1), length(bfs_order))
subgraph_nodes <- bfs_order[1:n_subgraph_nodes]
return(subgraph_nodes)
}
create_5_subgraphs <- function(graph) {
available_nodes <- V(graph)
subgraphs <- list()
for (i in 1:5) {
subgraph_nodes <- get_connected_subgraph(graph, available_nodes)
if (is.null(subgraph_nodes)) break
subgraphs[[i]] <- subgraph_nodes
available_nodes <- setdiff(available_nodes, subgraph_nodes)
}
return(subgraphs)
}
set.seed(42)
subgraphs <- create_5_subgraphs(g)
subgraph_colors <- c("red", "blue", "green", "yellow", "purple")
node_subgraph_colors <- rep("lightgray", vcount(g))
for (i in 1:length(subgraphs)) {
node_subgraph_colors[subgraphs[[i]]] <- subgraph_colors[i]
}
edge_subgraph_colors <- rep("lightgray", ecount(g))
for (i in 1:length(subgraphs)) {
subgraph_edges <- E(g)[.inc(subgraphs[[i]])]
edge_subgraph_colors[subgraph_edges] <- subgraph_colors[i]
}
plot(g,
layout = layout,
vertex.color = node_subgraph_colors,
vertex.label = node_labels,
vertex.label.color = "black",
vertex.size = 15,
edge.color = edge_subgraph_colors,
edge.width = 2,
main = "Network with 5 Separate Connected Subgraphs")
The above result looks almost correct, but the yellow nodes (e.g. 29) appears to be violating the connectivity.
Any pointers on how to fix this?
I wrote some optional code to compare the before/after:
node_info <- data.frame(
Node_Index = 1:vcount(g),
Original_Color = node_colors,
New_Color = node_subgraph_colors
)
get_subgraph_number <- function(node) {
subgraph_num <- which(sapply(subgraphs, function(x) node %in% x))
if (length(subgraph_num) == 0) return(NA)
return(subgraph_num)
}
node_info$Subgraph_Number <- sapply(node_info$Node_Index, get_subgraph_number)
head(node_info)
To complement jblood94's amazing answer, here is a quick plotting function that works with jblood94's answer:
library(igraph)
library(data.table)
f <- function(g, n) {
m <- length(g)
dt <- setDT(as_data_frame(g))
dt <- rbindlist(list(dt, dt[,.(from = to, to = from)]))
dt[,group := 0L]
used <- logical(m)
s <- sample(m, n)
used[s] <- TRUE
m <- m - n
dt[from %in% s, group := .GRP, from]
while (m) {
dt2 <- unique(
dt[group != 0L & !used[to], .(grow = to, onto = group)][sample(.N)],
by = "grow"
)
dt[dt2, on = .(from = grow), group := onto]
used[dt2[[1]]] <- TRUE
m <- m - nrow(dt2)
}
unique(dt[,to := NULL])[,.(vertices = .(from)), group]
}
plot_multiple_subgraphs <- function(n_plots = 25, n_rows = 10, n_cols = 5, n_subgraphs = 5) {
g <- make_lattice(dimvector = c(n_cols, n_rows))
layout <- layout_on_grid(g, width = n_cols)
n_nodes <- vcount(g)
color_palette <- c("red", "blue", "green", "yellow", "purple")
par(mfrow = c(5, 5), mar = c(0.5, 0.5, 2, 0.5))
for (i in 1:n_plots) {
subgraphs <- f(g, n_subgraphs)
node_colors <- rep("white", n_nodes)
for (j in 1:nrow(subgraphs)) {
nodes <- unlist(subgraphs$vertices[j])
node_colors[nodes] <- color_palette[j]
}
plot(g,
layout = layout,
vertex.color = node_colors,
vertex.label = NA,
vertex.size = 15,
edge.color = "gray",
edge.width = 0.5,
main = paste("Partition", i),
cex.main = 0.8)
}
}
plot_multiple_subgraphs()
count_components(g_sub) == 5
. I think your problem is that you're doing BFS on the original graph and taking the intersection with remaining nodes. You might first create a new graph that only has available vertices and BFS that. – Veracitymin_nodes=5
to be a restriction on the allowed subgraph partitions. – Veracity