下面的for循环在igraph图中对节点进行迭代。这里有2048个,所以速度很慢。我试图尽可能高效地编写代码(例如,不增加向量)。如何使循环运行得更快?
编辑:我还考虑过通过Rcpp用C++编写这篇文章。我只是不知道在那种情况下我该怎么用in。
编辑2: compatible_models实际上依赖于child_node。我在这里给出的是一个例子,说明它对于child_node的特定值可能是什么。
library(igraph)
library(Metrics)
set.seed(1234)
N <- 10000
A <- rnorm(N, 10, 2)
B <- rnorm(N, 9, 2)
C <- rnorm(N, 12, 1)
D <- rnorm(N, 7, 3)
Y <- A + B + A*B + D + A^2 + rnorm(N)
data <- data.frame(Y = Y, A = A, B = B, C = C, D = D)
partition <- sort(sample(N, 0.7*N))
data_train <- data[partition, ]
data_test <- data[-partition, ]
g <- make_empty_graph()
g <- g + vertices(1:2049)
generate_edges <- function(start_vertex, end_vertices) {
edges <- c()
for (i in 1:length(end_vertices)) {
edges <- c(edges, start_vertex, end_vertices[i])
}
return(edges)
}
outward_edges <- generate_edges(V(g)[1], V(g)[2:vcount(g)])
g <- g + edges(outward_edges, attr1 = rep(0, length(outward_edges) / 2), attr2 = rep(0, length(outward_edges) / 2))
successors <- matrix(nrow = length(g[[1, ]][[1]]), ncol = 9) # nrow = number of successors of node 1
i <- 1
for (child_node in 2:2049) {
# compatible_models <- lapply(...) # suppose this is a list of "formula" objects
# like:
compatible_models <- list(Y ~ A + B + C, Y ~ I(A^2) + B + C + D, Y ~ B + D)
compatible_models <- lapply(compatible_models, lm, data = data_train)
predictions <- sapply(compatible_models, predict, newdata = data_test)
successors[i, 1:3] <- c(edge_attr(g, name = "attr1", g[[1, V(g)[child_node], edges = TRUE]]), edge_attr(g, name = "attr2", g[[1, V(g)[child_node], edges = TRUE]]),
sum(apply(predictions, 2, rmse, actual = data_test$Y))/length(compatible_models))
i <- i + 1
}
发布于 2022-07-21 06:28:03
如果我错了,请纠正我,但我认为您可以在循环之外计算前三行(或构建模型对象的任何行,但不计算任何代码),这将使我的机器上的代码的性能提高两倍:
successors <- matrix(nrow = length(g[[1, ]][[1]]), ncol = 9) # nrow = number of successors of node 1
i <- 1
start_time <- Sys.time()
for (child_node in 2:2049) {
# build models inside loop:
compatible_models <- list(Y ~ A + B + C, Y ~ I(A^2) + B + C + D, Y ~ B + D)
compatible_models <- lapply(compatible_models, lm, data = data_train)
predictions <- sapply(compatible_models, predict, newdata = data_test)
successors[i, 1:3] <- c(edge_attr(g, name = "attr1", g[[1, V(g)[child_node], edges = TRUE]]), edge_attr(g, name = "attr2", g[[1, V(g)[child_node], edges = TRUE]]),
sum(apply(predictions, 2, rmse, actual = data_test$Y))/length(compatible_models))
i <- i + 1
}
Sys.time()-start_time
#Time difference of 26.69914 secs
在循环之外创建模型的优化代码:
## model building:
compatible_models <- list(Y ~ A + B + C, Y ~ I(A^2) + B + C + D, Y ~ B + D)
compatible_models <- lapply(compatible_models, lm, data = data_train)
predictions <- sapply(compatible_models, predict, newdata = data_test)
## initialisation:
successors2 <- matrix(nrow = length(g[[1, ]][[1]]), ncol = 9)
i <- 1
start_time <- Sys.time()
for (child_node in 2:2049) {
successors2[i, 1:3] <- c(edge_attr(g, name = "attr1", g[[1, V(g)[child_node], edges = TRUE]]), edge_attr(g, name = "attr2", g[[1, V(g)[child_node], edges = TRUE]]),
sum(apply(predictions, 2, rmse, actual = data_test$Y))/length(compatible_models))
i <- i + 1
}
Sys.time()-start_time
#Time difference of 8.885826 secs
all.equal(successors,successors2)
# [1] TRUE
https://stackoverflow.com/questions/73065833
复制相似问题