1. 理论推导

2. 模拟

```set.seed(1)
test_size <- 50
sigma <- 0.5
test_x <- matrix(rnorm(test_size*35,0,1),test_size)
test_y <- ifelse(apply(test_x,1,sum)>0 , 1 , 0)
```

```pnum <- 35
train_error_all <- matrix(NaN,100,pnum)
test_error_all <- matrix(0,100,pnum)
for( i in (1:100)){

flag = 1
while (flag  == 1){
train_x <- matrix(rnorm(50*35,0,1),nrow = 50)
train_y <- ifelse(apply(train_x,1,sum)>0 , 1 , 0)

lasso <- glmnet(train_x,train_y,alpha = 1, nlambda = 10000, family = 'gaussian',pmax = pnum)
lambdas <- data.frame(df = lasso\$df,lambda = lasso\$lambda)
ld <- aggregate(lambdas,by = list(lambdas\$df),mean)

ld <-  ld[-1,]
if(dim(ld)[1] == pnum){
flag = 0
}
}

lasso1 <- glmnet(train_x,train_y,alpha = 1, lambda = ld\$lambda, family = 'gaussian')
result_train <- predict(lasso1, newx = train_x,type = 'response',s = ld\$lambda)
result_test <- predict(lasso1, newx = test_x,type = 'response',s = ld\$lambda)

train_error <- apply(abs(result_train - train_y),2,mean)
test_error <- apply(abs(result_test - test_y),2,mean)

train_error_all[i,ld\$df] <- train_error
test_error_all[i,ld\$df] <- test_error

print(i)
}

train_error_mean <- apply(train_error_all,2,mean)
test_error_mean <- apply(test_error_all,2,mean)
```

```g1 <- ggplot()
saveGIF({
for (i in 1:100){
print(i)
train_data <- data.frame(train_error_all[i,])
train_data\$num <- 1:35
names(train_data) <- c('error','num')
train_data\$type <- 'train_error'

test_data <- data.frame(test_error_all[i,])
test_data\$num <- 1:35
names(test_data) <- c('error','num')
test_data\$type <- 'test_error'

train_all <- data.frame(train_error)
train_all\$num <- 1:35
names(train_all) <- c('error','num')
train_all\$type <- 'average(train_error)'

test_all <- data.frame(test_error)
test_all\$num <- 1:35
names(test_all) <- c('error','num')
test_all\$type <- 'average(test_error)'

g1 <- g1  + geom_line(data = train_data,aes(x=num,y=error),lwd = 1,colour = 'lightblue') +
geom_line(data = test_data,aes(x=num,y=error),lwd = 1,colour = 'lightpink') +
geom_line(data = train_all,aes(x=num,y=error),lwd = 2,colour = 'blue') +
geom_line(data = test_all,aes(x=num,y=error),lwd = 2,colour = 'red')

print(g1)
}
```

```for (i in 1:100){
plot(1:pnum,train_error_all[i,],xlab = '',ylab = '',xlim = c(0,pnum),ylim = c(0,0.6),
type = 'l',col = 'lightblue')
par(new = T)
plot(1:pnum,test_error_all[i,],xlab = '',ylab = '',xlim = c(0,pnum),ylim = c(0,0.6),
type = 'l',col = 'lightpink')

par(new = T)
}

plot(ld\$df,train_error,xlab = '',ylab = '',xlim = c(0,pnum),ylim = c(0,0.6),type = 'l',col = 'blue',lwd = 2)
par(new = T)
plot(ld\$df,test_error,xlab = 'Model Complexity (df)',ylab = 'Prediction Error',xlim = c(0,pnum),ylim = c(0,0.6),
type = 'l',col = 'red',lwd = 2)
box()
```

1. 虽然之前提到了方差-偏差分解，但模拟过程中其实并没有用到，计算的是总的误差，只是为了分析方便，下一篇会通过方差偏差分解来更细致分析误差。

2.之前提到的方差-偏差分解并不一定成立，只有在用均方误差度量模型误差时才成立，如果使用0-1误差等其他方法，就不再成立。

1. Ruppert D. The Elements of Statistical Learning: Data Mining, Inference, and Prediction[J]. Journal of the Royal Statistical Society, 2010, 99(466):567-567.

2. 周志华. 机器学习 : = Machine learning[M]. 清华大学出版社, 2016.

