统计计算课程笔记(一)

目录

2018 SC学习笔记(雾)

第1节

1
2
3
4
5
a = seq(1,100,length.out = 12)
a
a<20
a[a>20]
a[a=20]

function-which

返回查询值的位置坐标

1
2
which(a>90)
a[which(a>90)]

logic condition & | !

逻辑判断:与或非

1
2
3
4
b = rep(c('M','F'),6)
a >80 & b =='M'
a[a >80 & b =='M']
which(a >80 & b =='M')

clean environment

删除,虽然我从来没用过。。。

1
rm(list = ls())

square root

开平方根以及一些常见的运算

1
2
3
4
5
sqrt(5)
sqrt(a)
log(a)
tan(a)
a^2

1.1检查向量的长度

1
2
3
4
5
6
7
8
length(a)
a + b
a + 1

k = a + c(1,2,3)
k - a

a/b # 向量也可以直接做除法

插一句
脚本语言尽量使用向量化的书写,循环 的速度比较慢
考虑兼容性,并且考虑运算速度,还是使用向量更快

1.2向量的数据类型function-class

1
2
3
4
class(a)
typeof(a)
d = 'wyh'
class(d)

1.3操作矩阵

1
2
3
4
5
6
7
8
mat = matrix(c(22,31,17,38,16,7),3,2)
dim(mat) # 维度
class(mat)
nrow(mat) # 几行
ncol(mat) # 几列
mat[,1:2]
mat[1:2]
mat[1:2,]

在矩阵的前两行中挑出小于35的数

1
2
3
4
5
6
7
8
9
mat[1:2,]<35
mat[mat[1:2,]<35]

mat[which(mat[1:2,]<35)] # 经典错误,没有搞清坐标对应的对象

which(mat[1:2,]<35)
mat[which(mat<35)]

mat[1,mat[1,]<35]

下面三段代码效果一样

1
2
3
4
5
mat[1:2,][mat[1:2,]<35]
mat[1:2,][which(mat[1:2,]<35)]
mat[which(mat[1:2,]<35,arr.ind = TRUE)]
# arr.ind 这个参数是改变了返回位置值的方式
which(mat[1:2,]<35,arr.ind = TRUE) # 返回了行列的坐标

合并矩阵为向量

1
c(mat[,1],mat[,2])

下面就开始瞎写了

行列互换

1
t(mat) #transports???

方阵

1
ze = matrix(0,4,4)

对角阵

1
diag(5)

function-seq

序列函数

1
2
seq(0,0,length = 5) # 序列函数
matrix(c(1,seq(0,0,length = 5)),5,5)

1
2
3
4
5
n = 5
mat1 = matrix(0,n,n)
mat1[seq(1,n*n,n+1)] = 1
mat1
matrix(rep(c(1,rep(0,n)),n),n,n)

矩阵乘法

1
2
3
4
5
6
rnorm(35,5)
mat2 = matrix(rnorm(35,5),5,7)
mat1%*%mat2 # 矩阵乘法
a = matrix(1:24,6)
b= t(a)
p = a%*%b

矩阵求逆,秩

1
2
3
4
5
6
7
8
solve(p) #求逆,必须是满秩的矩阵
qr()$rank#求矩阵秩
p = matrix(c(5,1,2,3),2)
p
pinv = solve(p)
pinv%*%p
p%*%pinv
# 双精度默认16位之后均是零

求行列式

1
2
det(p) #determinant
?det

求特征值

1
2
3
4
ee = eigen(p) #eigenvalue
ee$values
prod(ee$values)
# 连乘product,正好等于行列式

第2节

确定双精度范围内是否相等

1
all.equal(a,b)

按行填需要调整参数byrow=TRUE

1
matrix(1:25,5,5,byrow = TRUE)

三维数组array

1
2
ary = array(1:24,c(2,3,4))  # c(2,3,4)是维度
ary[,1,2] # 最后一位是维度

数据框data.frame,存储不同类型数据

1
2
3
4
5
6
7
8
9
10
11
12
a = matrix(c(1,2,3,4),2,2)
a[4] = 'feng'
a

name = c('yang','crow','ruby','weiss')
age = c(19,28,17,14)
sal = c(2000,1800,5000,2000)
data = data.frame(name,age,sal)
data

sex = c('f','m','f','f')
cbind(data,sex)

转换数据类型,类型不合的克星

1
as.numeric('2')

树状图数据如何存储->可嵌套的列表

1
2
3
4
5
6
7
8
9
10
11
info = list() #先定义一个空列表是精髓hhh
info$wang5 = data.frame(age = 19, work = 'yes')
info$zhang3 = matrix(c(1,2,3,4),2,2)
class(info)
class('wang5')
info$li4 = list(age = 28, edu = 'cufe', sal = '180/min')
info$li4
info$li4$age

length(info)
names(info)

给列表追加新元素

1
2
3
info$zhao6 = 'Neimenggu'  #直接加就行,不用初始化
info[[1]][1,2] = 999
info$zhang3[1,2] = 999

第3节 apply族函数

lapply,rapply 函数初体验

给列表的每个元素取对数(其实什么操作都可以有)

1
2
3
4
5
lapply(info,log) # 划重点
info[['zhang3']] = list(b1 = 10,b2 = 7)

rapply(info,log) #可递归的(r)
rapply(info,log,how = 'unlist') #参数how的值 默认unlist, 还有replace

把函数改成x平方

1
rapply(info, function(x) x^2,how = 'replace')

deal with matrix:caculate the mean of col/row

1
2
3
mat = matrix(1:24,4,6)
rowMeans(mat)# only for mat
colMeans(mat)

或者你还可以使用apply函数

1
2
3
apply(mat, 2, mean) # 2 means dim2 第二维
apply(mat, 2, median)
apply(mat, 2, var)

change to array

背景是:4 groups, 6 members, and 5 terms

1
ary = array(1:120,c(4,6,5))

caculate mean of score every term

1
2
3
4
5
apply(ary, 3, mean)
apply(ary, c(1,3), mean) # every group's mean in each term 4*5
apply(ary, c(1,3), max)
maymaxmin = function(x) max(x)-min(x)
apply(ary, c(1,3), maymaxmin)

apply一般只作用于二维

1
2
3
4
5
6
7
ary2 = array(rnorm(120),c(4,5,6))
apply(ary2, 3, mean)
apply(ary2, 1, colMeans)
apply(ary2, 2, median)
apply(ary2, 1, sort)
apply(ary2, 1, sum)
?apply

if it isn’t a matrix, you can use ‘as.matrix’
lapply (list) and there, simplify2array; tapply,
and convenience functions sweep and aggregate.

接着看lapply

用于列表

1
2
3
4
5
6
7
lst = list()
lst$a = 1:10
lst$b = 4:6
lst$c = 9:24
lapply(lst, sum)
lapply(lst, mean)
lapply(lst, length)

var is not suitable with matrix
too much dims
use as.numberic

1
2
3
4
5
myfun = function(x) (x - mean(x))^2
apply(ary, 1, myfun)
lst$d = list()
lst$d$d1 = c(2,3,4,6)
lst$d$d2 = c(3,8,9,1,23)

递归rapply

1
2
3
rapply(lst, sum)
rapply(lst, sum, how = 'replace')
lst2 = rep(60,10)

list can’t do plus-minus-caculation,but unlist can

1
unlist(lst2) - unlist(lst$a)

多元mapply

1
2
3
4
mapply(function(x,y) x-y, lst2, lst$a) # multiple lapply
mapply('-', lst2, lst$a)
# '+' ,'-' ,'*' ,'/' , all those symbols are functions
mapply(sum, lst2, lst$a)

if list2 has a total same distruction as list3

两列表间计算

1
2
3
4
5
6
7
8
list2 = list()
list2$a = 3
list2$b = c(4,4)
list3 = list()
list3$c = 5
list3$d = c(6,7)
mapply('+', list2, list3)
mapply(function(x,y) x+y, list2,list3)

第4节

change of data type

准备工作

1
2
3
4
5
6
7
8
9
10
11
12
a = matrix(1:24,4,6)
t(a)
matrix(a,2,12)
matrix(a,2,)
matrix(t(a),,2)

b = list()
b$b1 = c(1:4)
b$b2 = c(5:8)
b$b3 = c(9:12)
matrix(unlist(b),3,4)
do.call(cbind,b) # like apply function

compare

1
2
3
a = 5
b = 6
a == b

注意’= =’is not right

1
2
3
4
5
a != b
a > b
a < b
a >= b
a <= b

two to more

1
2
3
4
5
6
A = c(1,3,5)
B = c(3,2,5)
A == B
A > B
A > 2
A > c(1,2,3,4) # auto repeat 1/4 time, but warning

R中出现warning一定不能放过

change uppercase and lowercase

1
2
3
'feng' == 'Feng'
tolower('Feng') == 'feng'
toupper('feng')

更厉害的两数

1
all.equal(1,exp(3)/exp(3)) # 忽略浮点数的判断

问:有没有人不及格?

1
2
3
4
5
rec = c(1,99,61,74)
any(rec < 60) # 至少一条满足 if any
all(rec > 60) # 全部满足 if all
'zhang3' %in% c('zhang3','li4','wang5') # if in
tolower('zhang3') %in% tolower(c('ZHANG3','LI4','WANG5'))

condition test

a = 10
if (a < 10){
print(a)
}else{
print(a+100)
}

if (2018%%4 == 0){
print(‘yes’)}
{print(‘no’)}

if (2018%%4 == 0){ print(‘yes’)
}else{
print(‘no’)}

建立函数

take care of the position of ‘{}’ and ‘else’
要注意 输入是否合法,容错纠正机制;逻辑一定要清楚,最后输出可以使用list和dataframe
问题:如何判断闰年

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
year = c(1900:2018)
if (year%%400 == 0 | year%%4==0 & year%%100!=0){
print()
}else {

}

print(year[which((year%%400 == 0 | year%%4==0 & year%%100!=0)=='TRUE')])

isleapyear = function(year){
if(!is.numeric(year)) # 这个判断很关键
{
stop('you must specify a numerical input.') # 容错纠正机制
}

index = year%%400 == 0 || year%%4==0 && year%%100!=0

out = year[index]

return(out) #也可以使用list,dataframe
}

yr = c(1900:2018)
isleapyear(yr)

练习

applyfamily function

1
?apply

apply

deal with matrix & same type

1
2
3
4
mat = matrix(1:24,4,6)
ary = array(1:120,c(4,6,5))
apply(mat, 2, mean) # means of second dim
apply(ary, 3, mean)

lapply

list

1
2
3
4
5
6
7
lst = list()
lst$a = 1:10
lst$b = 4:6
lst$c = 9:24
lapply(lst, sum) # deal with list
lapply(lst, length)
sapply(lst, sum) # output is vector

rapply

1
2
3
rapply(lst, sum)
rapply(lst, length)
rapply(lst, sum, how = 'replace')

tapply

irregular type 分类统计

1
2
3
4
?tapply
fac <- factor(rep_len(1:3, 17), levels = 1:5)
table(fac)
tapply(1:17, fac, sum)

tapply(x,f,g) :x为向量,f为因子列,g为操作函数

1
2
3
4
a = c(1:10)
b = c(5:14)
data = data.frame(a,b)
tapply(data$a, data$b, sum)

mapply

对多个列表或者向量参数使用函数

1
2
3
4
lst2 = rep(60,10) # list can't do plus-minus-caculation
unlist(lst2) - unlist(lst$a) # but unlist can
mapply(function(x,y) x-y, lst2, lst$a)
mapply(sum, lst2, lst$a)

2.my function

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
a = rnorm(100)
summary(a)

mysummary = function(lst){
if(!is.numeric(lst))
{
stop('you must specify a numerical input.') # 容错纠正机制
}

min = min(lst)
median = median(lst)
mean = mean(lst)
max = max(lst)
var = var(lst)

out = data.frame(min, median, mean, max, var)
return(out)
}
mysummary(a)

3.解方程

法1

1
2
3
4
5
f <- function(x,a,b,c) a*x^2+b*x+c
a <- 1; b <- 5; c <- 6
delta = b^2-4*a*c
solve1 = (-b + (b^2-4*a*c)^(1/2))/(2*a)
solve2 = (-b - (b^2-4*a*c)^(1/2))/(2*a)

写出关键的地方

1
2
3
4
5
6
7
8
9
10
11
12
13
test = function(a,b,c){
delta = b^2-4*a*c
if (delta == 0){
return('只有一个根')
}else if(delta < 0 ) {
return('有两个虚数解')
}else {
solve1 = (-b + (b^2-4*a*c)^(1/2))/(2*a)
solve2 = (-b - (b^2-4*a*c)^(1/2))/(2*a)
return(data.frame('有两个实根',solve1,solve2))
}
}
test(a,b,c)

法2

求一元二次方程ax^2+bx+c=0,设a=1,b=5,c=6,求x?

1
2
3
f3 <- function(x,a,b,c) a*x^2+b*x+c
a <- 1; b <- 5; c <- 6
result1 <- uniroot(f3,c(0,-2),a=a,b=b,c=c,tol=0.0001)

uniroot 求解单个根

1
2
3
result2 <- uniroot(f3,c(-4,-3),a=a,b=b,c=c,tol=0.0001)
result1$root
result2$root

最后再来看一下简单的函数

函数mysummary

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
a = rnorm(100)
summary(a)

mysummary = function(lst){
if(!is.numeric(lst))
{
stop('you must specify a numerical input.') # 容错纠正机制
}

min = min(lst)
median = median(lst)
mean = mean(lst)
max = max(lst)
var = var(lst)

out = data.frame(min, median, mean, max, var)
return(out)
}
mysummary(a)

解方程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#先输入三个向量
vec1 = c(1,2,1)
vec2 = c(2,3,2)
vec3 = c(1,2,1)
test = function(vec1,vec2,vec3){
m = length(vec1)
#将填入三个维度'num','condition','value'
out = list()

#循环遍历
for(i in 1:m){
a = vec1[i]
b = vec2[i]
c = vec3[i]

#检验输入数据是否合法
if(is.numeric(a)==FALSE|is.numeric(b)==FALSE|is.numeric(c)==FALSE){
return('请输入数值型向量')
}else if(length(a)!=length(b)|length(a)!=length(c)|length(b)!=length(c)){
return('请输入相同长度的向量')
}

#求根
delta = b^2-4*a*c
if (delta == 0){
out$number[i] = i
out$condition[i] = '只有一个根'
out$value[i] = -2*a/b
}else if(delta < 0 ) {
out$number[i] = i
out$condition[i] = '有两个虚数解'
out$value[i] = '无实数解'
}else {
solve1 = (-b + (b^2-4*a*c)^(1/2))/(2*a)
solve2 = (-b - (b^2-4*a*c)^(1/2))/(2*a)
out$number[i] = i
out$condition[i] = '有两个根'
out$value[i] = data.frame(solve1,solve2)
}
}
return(data.frame(out$num,out$condition,out$value))
}

#实验检验
test = test(vec1,vec2,vec3)

感谢大家,欢迎在评论区交流讨论!


本文链接: https://konelane.github.io/2018/03/26/R语言-统计计算课程笔记/

-- EOF --

¥^¥请氦核牛饮一盒奶~suki