# 【学习】【R语言读书会】《R实战》读书笔记（第七章）

1 描述统计

2 频次和相依表

3 相关系数和协方差

4 t-检验

5 非参数统计

1 描述统计

> vars <- c(“mpg”, “hp”, “wt”)

mpg hp wt

Mazda RX4 21.0 110 2.620

Mazda RX4 Wag 21.0 110 2.875

Datsun 710 22.8 93 2.320

Hornet 4 Drive 21.4 110 3.215

Valiant 18.1 105 3.460

> summary(mtcars[vars])

mpg hp wt

Min. :10.40 Min. : 52.0 Min. :1.513

1st Qu.:15.43 1st Qu.: 96.5 1st Qu.:2.581

Median :19.20 Median :123.0 Median :3.325

Mean :20.09 Mean :146.7 Mean :3.217

3rd Qu.:22.80 3rd Qu.:180.0 3rd Qu.:3.610

Max. :33.90 Max. :335.0 Max. :5.424

> mystats <- function(x, na.omit=FALSE) {

+ if(na.omit)

+ x <- x[!is.na(x)]

+ m <- mean(x)

+ n <- length(x)

+ s <- sd(x)

+ skew <- sum((x-m)^3/s^3) / n

+ kurt <- sum((x-m)^4/s^4) / n - 3

+ return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt))

+ }

> sapply(mtcars[vars], mystats)

mpg hp wt

n 32.000000 32.0000000 32.00000000

mean 20.090625 146.6875000 3.21725000

stdev 6.026948 68.5628685 0.97845744

skew 0.610655 0.7260237 0.42314646

kurtosis -0.372766 -0.1355511 -0.02271075

> apply(mtcars[vars], 2, mystats)

mpg hp wt

n 32.000000 32.0000000 32.00000000

mean 20.090625 146.6875000 3.21725000

stdev 6.026948 68.5628685 0.97845744

skew 0.610655 0.7260237 0.42314646

kurtosis -0.372766 -0.1355511 -0.02271075

> library(Hmisc)

format.pval, round.POSIXt, trunc.POSIXt, units

> describe(mtcars[vars])

mtcars[vars]

3 Variables 32 Observations

—————————————————————————————————————————

mpg

n missing unique Mean .05 .10 .25 .50 .75 .90 .95

32 0 25 20.09 12.00 14.34 15.43 19.20 22.80 30.09 31.30

lowest : 10.4 13.3 14.3 14.7 15.0, highest: 26.0 27.3 30.4 32.4 33.9

—————————————————————————————————————————

hp

n missing unique Mean .05 .10 .25 .50 .75 .90 .95

32 0 22 146.7 63.65 66.00 96.50 123.00 180.00 243.50 253.55

lowest : 52 62 65 66 91, highest: 215 230 245 264 335

—————————————————————————————————————————

wt

n missing unique Mean .05 .10 .25 .50 .75 .90 .95

32 0 29 3.217 1.736 1.956 2.581 3.325 3.610 4.048 5.293

lowest : 1.513 1.615 1.835 1.935 2.140, highest: 3.845 4.070 5.250 5.345 5.424

———————————————————————————————————————–

> library(psych)

describe

> describe(mtcars[vars])

vars n mean sd median trimmed mad min max range skew kurtosis se

mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61 -0.37 1.07

hp 2 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73 -0.14 12.12

wt 3 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42 -0.02 0.17

> aggregate(mtcars[vars], by=list(am=mtcars\$am), mean)

am mpg hp wt

1 0 17.14737 160.2632 3.768895

2 1 24.39231 126.8462 2.411000

> aggregate(mtcars[vars], by=list(am=mtcars\$am), sd)

am mpg hp wt

1 0 3.833966 53.90820 0.7774001

2 1 6.166504 84.06232 0.6169816

by(data, INDICES, FUN)

2 频次和相依表

> library(vcd)

> ?Arthritis

starting httpd help server … done

ID Treatment Sex Age Improved

1 57 Treated Male 27 Some

2 46 Treated Male 29 None

3 77 Treated Male 30 None

4 17 Treated Male 32 Marked

5 36 Treated Male 46 Marked

6 23 Treated Male 58 Marked

> mytable <- with(Arthritis, table(Improved))

> mytable

Improved

None Some Marked

42 14 28

> prop.table(mytable)

Improved

None Some Marked

0.5000000 0.1666667 0.3333333

> prop.table(mytable) * 100

Improved

None Some Marked

50.00000 16.66667 33.33333

> mytable1 <- xtabs(~ Treatment + Improved, data=Arthritis)

> mytable1

Improved

Treatment None Some Marked

Placebo 29 7 7

Treated 13 7 21

> margin.table(mytable1, 1)

Treatment

Placebo Treated

43 41

> margin.table(mytable1, 2)

Improved

None Some Marked

42 14 28

> prop.table(mytable1, 1)

Improved

Treatment None Some Marked

Placebo 0.6744186 0.1627907 0.1627907

Treated 0.3170732 0.1707317 0.5121951

Improved

Treatment None Some Marked Sum

Placebo 29 7 7 43

Treated 13 7 21 41

Sum 42 14 28 84

R提供了很多函数进行独立检验工作。

> chisq.test(mytable1)

Pearson’s Chi-squared test

data: mytable1

X-squared = 13.055, df = 2, p-value = 0.001463

> fisher.test(mytable1)

Fisher’s Exact Test for Count Data

data: mytable1

p-value = 0.001393

alternative hypothesis: two.sided

> assocstats(mytable1)

X^2 df P(> X^2)

Likelihood Ratio 13.530 2 0.0011536

Pearson 13.055 2 0.0014626

Phi-Coefficient : 0.394

Contingency Coeff.: 0.367

Cramer’s V : 0.394

cor函数，形式如下。

cor(x, use= , method=)

> states <- state.x77[, 1:6]

> cov(states)

Population Income Illiteracy Life Exp Murder HS Grad

Population 19931683.7588 571229.7796 292.8679592 -407.8424612 5663.523714 -3551.509551

Income 571229.7796 377573.3061 -163.7020408 280.6631837 -521.894286 3076.768980

Illiteracy 292.8680 -163.7020 0.3715306 -0.4815122 1.581776 -3.235469

Life Exp -407.8425 280.6632 -0.4815122 1.8020204 -3.869480 6.312685

Murder 5663.5237 -521.8943 1.5817755 -3.8694804 13.627465 -14.549616

HS Grad -3551.5096 3076.7690 -3.2354694 6.3126849 -14.549616 65.237894

> cor(states)

Population Income Illiteracy Life Exp Murder HS Grad

Population 1.00000000 0.2082276 0.1076224 -0.06805195 0.3436428 -0.09848975

Income 0.20822756 1.0000000 -0.4370752 0.34025534 -0.2300776 0.61993232

Illiteracy 0.10762237 -0.4370752 1.0000000 -0.58847793 0.7029752 -0.65718861

Life Exp -0.06805195 0.3402553 -0.5884779 1.00000000 -0.7808458 0.58221620

Murder 0.34364275 -0.2300776 0.7029752 -0.78084575 1.0000000 -0.48797102

HS Grad -0.09848975 0.6199323 -0.6571886 0.58221620 -0.4879710 1.00000000

cor.test(x, y, alternative=, method)

> cor.test(states[,3],states[,5])

Pearson’s product-moment correlation

data: states[, 3] and states[, 5]

t = 6.8479, df = 48, p-value = 1.258e-08

alternative hypothesis: true correlation is not equal to 0

95 percent confidence interval:

0.5279280 0.8207295

sample estimates:

cor

0.7029752

t检验

1 描述统计的内容，频次和相依表

2 协方差和相关系数

3 各种检验经验方法（t检验和非参数检验）

Resource

1 http://www.wangluqing.com/2014/06/r-in-action-note9/

2《R实战》英文版第二部分第七章

2122 篇文章103 人订阅

0 条评论

## 相关文章

791

831

48610

64912

3368

1813

3749

2K4

4188

### O(1)效率的表面模糊算法优化。

很久没有写文章了，主要是最近一段时间没有以前那么多空暇空间，内存和CPU占用率一致都很高，应前几日群里网友的要求，今天发个表面模糊的小程序来找回之前...

2466