> cor(states) Murder Population Illiteracy Income Frost Murder 1.0000000...> fit1 <- lm(Murder ~ Population + Illiteracy + Income + Frost, data = states) > fit2 <- lm(Murder ~...~ Population + Illiteracy Model 2: Murder ~ Population + Illiteracy + Income + Frost Res.Df RSS...1 236.196 525.38 123.605 Step: AIC=93.76 Murder ~ Population + Illiteracy Df Sum...", "Frost")]) > leaps <- regsubsets(Murder ~ Population + Illiteracy + Income + Frost, data = states,
所谓嵌套模型,即它的一 些项完全包含在另一个模型中 用anova()函数比较 > states<-as.data.frame(state.x77[,c("Murder","Population","Illiteracy...","Income","Frost")]) > fit1<-lm(Murder~Population+Illiteracy+Income+Frost,data=states) >fit2<-lm(Murder...+ Illiteracy Model 2: Murder ~ Population + Illiteracy + Income +Frost Res.Df RSS Df Sum of Sq...1 144.264 433.43 115.986 Step: AIC=95.75 Murder ~ Population +Illiteracy + Income Df Sum...: (Intercept) Population Illiteracy 1.6515497 0.0002242 4.0807366 2.
Grad, Frost, abb, and are. c) Add a variable to the data frame which should categorize the level of illiteracy...: [0,1) is low, [1,2) is some, [2, inf) is high. d) Find out which state from the west, with low illiteracy...< 1,"Low Illiteracy", ifelse(dfa$Illiteracy >= 1 & dfa$Illiteracy < 2, "Some Illiteracy...", "High Illiteracy") ) # Or: dfa$illi <- cut(dfa$Illiteracy,...", "Some Illiteracy", "High Illliteracy")) # d) sub <- subset(dfa, illi == "Low <em>Illiteracy</em>" & reg =
多元线性回归 states = as.data.frame(state.x77[,c("Murder","Population", "Illiteracy...lty.smooth=2, main="Scatter Plot Matrix") fit = lm(Murder~Population+Illiteracy...effect("hp:wt",fit,xlevels=list(wt=c(2.2,3.2,4.2))),multiline=TRUE) #回归推断 fit = lm(Murder~Population+Illiteracy...lm(weight~height+I(height^2),data=women[-c(13,15),]) plot(newfit) #又一个例子 fit = lm(Murder~Population+Illiteracy...Income+Frost, data=states) plot(fit) #改进的方法,更加推荐 #正态性 library(car) fit = lm(Murder~Population+Illiteracy
summarize the model and make suggestions on the control of crime rate ## Population Income Illiteracy...Consider the marginal and bivariate distributions ## Population Income Illiteracy Life Exp...variables, plot the scatter plot between the different variables ## Population Income Illiteracy...1.880e-04 6.474e-05 2.905 0.00584 ** ## Income -1.592e-04 5.725e-04 -0.278 0.78232 ## Illiteracy...1.202e+02 1.718e+01 6.994 1.17e-08 ***## Population 1.780e-04 5.930e-05 3.001 0.00442 ** ## Illiteracy
> states <- state.x77[, 1:6] > cov(states) Population Income Illiteracy Life Exp Murder...3551.509551 Income 571229.7796 377573.3061 -163.7020408 280.6631837 -521.894286 3076.768980 Illiteracy...3076.7690 -3.2354694 6.3126849 -14.549616 65.237894 > cor(states) Population Income Illiteracy...0.3436428 -0.09848975 Income 0.20822756 1.0000000 -0.4370752 0.34025534 -0.2300776 0.61993232 Illiteracy
检测多重共线性 > library(car) > vif(fit) Population Illiteracy Income Frost 1.2 2.2 1.3 2.1 > sqrt(vif(...Population Illiteracy Income Frost FALSE FALSE FALSE FALSE 结果表明预测变量不存在多重共线性问题。
fitted(fit2)) 8.2.4多元线性回归 > library(car) > states<-as.data.frame(state.x77[,c("Murder","Population","Illiteracy...1.0000000 0.3436428 0.7029752 -0.2300776 Population 0.3436428 1.0000000 0.1076224 0.2082276 Illiteracy...-0.3321525 -0.6719470 0.2262822 Frost Murder -0.5388834 Population -0.3321525 Illiteracy
> states <- state.x77[, 1:6] > cov(states) Population Income Illiteracy Life...571229.7796 292.8679592 -407.8424612 Income 571229.7796 377573.3061 -163.7020408 280.6631837 Illiteracy...0.48797102 HS Grad 1.00000000 > cor(states, method='spearman') Population Income Illiteracy...0.3130496 -0.1040171 0.3457401 Income 0.1246098 1.0000000 -0.3145948 0.3241050 -0.2174623 Illiteracy...-0.6545396 0.5239410 -0.4367330 HS Grad Population -0.3833649 Income 0.5104809 Illiteracy
head(UScrime,3) t.test(Prob~So,data=UScrime) ## 4.非参数多组比较 class <- state.region var <- state.x77[,c("Illiteracy
下面的例子为用州的人口和文盲率来预测谋杀率,对模型进行了Box-Tidwell变换: > boxTidwell(Murder~Population+Illiteracy,data=states)...Score Statistic p-value MLE of lambda Population -0.3228003 0.7468465 0.8693882 Illiteracy
> states<-as.data.frame(cbind(state.region,state.x77)) > kruskal.test(Illiteracy~state.region,data=states...) Kruskal-Wallis rank sum test data: Illiteracy by state.region Kruskal-Wallis chi-squared =
谋杀率案例 states <- as.data.frame(state.x77) colnames(states) fit <- lm(Murder ~ Population + Income + Illiteracy
分组计算示例 > aggregate(state.x77, list(Region = state.region), mean) Region Population Income Illiteracy...Cold = state.x77[,"Frost"] > 130), + mean) Region Cold Population Income Illiteracy
> rm(list=ls()) > states <- data.frame(state.region,state.x77) > means <- aggregate(states$Illiteracy...West 1.023077 2 South 1.737500 > barplot(means$x, names.agr=means$Group.1) > title(“Mean Illiteracy
> states <- data.frame(state.region, state.x77) > means <- aggregate(states$Illiteracy, by=list(state.region...West 1.023077 2 South 1.737500 > barplot(means$x, names.arg = means$Group.1) > title("Mean Illiteracy
WY > state["Alabama",] state.name state.abb state.region Population Income Illiteracy Life.Exp
state_data = state_data_0.copy() state_data 这个数据表是美国几个州的统计数据,每一行代表一个州,每一列分别是人口(Population)、收入(Income)、受教育程度(Illiteracy
> states<-as.data.frame(state.x77) > set.seed(1234) >spearman_test(Illiteracy~Murder,data=states,distribution
领取专属 10元无门槛券
手把手带您无忧上云