2018-03-26

统计计算课程笔记（一）

2018 SC学习笔记（雾）

第1节

a = seq(1,100,length.out = 12)
a
a<20
a[a>20]
a[a=20]

function-which

返回查询值的位置坐标

1 2	which(a>90) a[which(a>90)]

logic condition & | !

逻辑判断:与或非

b = rep(c('M','F'),6)
a >80 & b =='M'
a[a >80 & b =='M']
which(a >80 & b =='M')

clean environment

删除，虽然我从来没用过。。。

1	rm(list = ls())

square root

开平方根以及一些常见的运算

sqrt(5)
sqrt(a)
log(a)
tan(a)
a^2

1.1检查向量的长度

length(a)
a + b
a + 1

k = a + c(1,2,3)
k - a

a/b # 向量也可以直接做除法

插一句
脚本语言尽量使用向量化的书写，循环的速度比较慢
考虑兼容性，并且考虑运算速度，还是使用向量更快

1.2向量的数据类型function-class

class(a)
typeof(a)
d = 'wyh'
class(d)

1.3操作矩阵

mat = matrix(c(22,31,17,38,16,7),3,2)
dim(mat)  # 维度
class(mat)
nrow(mat) # 几行
ncol(mat) # 几列
mat[,1:2]
mat[1:2]
mat[1:2,]

在矩阵的前两行中挑出小于35的数

mat[1:2,]<35
mat[mat[1:2,]<35]

mat[which(mat[1:2,]<35)] # 经典错误，没有搞清坐标对应的对象

which(mat[1:2,]<35)
mat[which(mat<35)]

mat[1,mat[1,]<35]

下面三段代码效果一样

mat[1:2,][mat[1:2,]<35]
mat[1:2,][which(mat[1:2,]<35)]
mat[which(mat[1:2,]<35,arr.ind = TRUE)]
# arr.ind 这个参数是改变了返回位置值的方式
which(mat[1:2,]<35,arr.ind = TRUE) # 返回了行列的坐标

合并矩阵为向量

1	c(mat[,1],mat[,2])

下面就开始瞎写了

行列互换

1	t(mat) #transports???

方阵

1	ze = matrix(0,4,4)

对角阵

diag(5)

function-seq

序列函数

1 2	seq(0,0,length = 5) # 序列函数 matrix(c(1,seq(0,0,length = 5)),5,5)

n = 5
mat1 = matrix(0,n,n)
mat1[seq(1,n*n,n+1)] = 1
mat1
matrix(rep(c(1,rep(0,n)),n),n,n)

矩阵乘法

rnorm(35,5)
mat2 = matrix(rnorm(35,5),5,7)
mat1%*%mat2 # 矩阵乘法
a = matrix(1:24,6)
b= t(a)
p = a%*%b

矩阵求逆，秩

solve(p) #求逆，必须是满秩的矩阵
qr()$rank#求矩阵秩
p = matrix(c(5,1,2,3),2)
p
pinv = solve(p)
pinv%*%p
p%*%pinv
# 双精度默认16位之后均是零

求行列式

1 2	det(p) #determinant ?det

求特征值

ee = eigen(p) #eigenvalue
ee$values
prod(ee$values)
# 连乘product,正好等于行列式

第2节

确定双精度范围内是否相等

1	all.equal(a,b)

按行填需要调整参数byrow=TRUE

1	matrix(1:25,5,5,byrow = TRUE)

三维数组array

1 2	ary = array(1:24,c(2,3,4)) # c(2,3,4)是维度 ary[,1,2] # 最后一位是维度

数据框data.frame，存储不同类型数据

a = matrix(c(1,2,3,4),2,2)
a[4] = 'feng'
a

name = c('yang','crow','ruby','weiss')
age = c(19,28,17,14)
sal = c(2000,1800,5000,2000)
data = data.frame(name,age,sal)
data

sex = c('f','m','f','f')
cbind(data,sex)

转换数据类型，类型不合的克星

1	as.numeric('2')

树状图数据如何存储->可嵌套的列表

info = list() #先定义一个空列表是精髓hhh
info$wang5 = data.frame(age = 19, work = 'yes')
info$zhang3 = matrix(c(1,2,3,4),2,2)
class(info)
class('wang5')
info$li4 = list(age = 28, edu = 'cufe', sal = '180/min')
info$li4
info$li4$age

length(info)
names(info)

给列表追加新元素

1
2
3

info$zhao6 = 'Neimenggu'  #直接加就行，不用初始化
info[[1]][1,2] = 999
info$zhang3[1,2] = 999

第3节 apply族函数

lapply，rapply 函数初体验

给列表的每个元素取对数（其实什么操作都可以有）

lapply(info,log) # 划重点
info[['zhang3']] = list(b1 = 10,b2 = 7)

rapply(info,log) #可递归的（r）
rapply(info,log,how = 'unlist') #参数how的值 默认unlist, 还有replace

把函数改成x平方

1	rapply(info, function(x) x^2,how = 'replace')

deal with matrix：caculate the mean of col/row

1
2
3

mat = matrix(1:24,4,6)
rowMeans(mat)# only for mat
colMeans(mat)

或者你还可以使用apply函数

1
2
3

apply(mat, 2, mean) # 2 means dim2 第二维
apply(mat, 2, median)
apply(mat, 2, var)

change to array

背景是：4 groups, 6 members, and 5 terms

1	ary = array(1:120,c(4,6,5))

caculate mean of score every term

apply(ary, 3, mean)
apply(ary, c(1,3), mean) # every group's mean in each term 4*5
apply(ary, c(1,3), max)
maymaxmin = function(x) max(x)-min(x)
apply(ary, c(1,3), maymaxmin)

apply一般只作用于二维

ary2 = array(rnorm(120),c(4,5,6))
apply(ary2, 3, mean)
apply(ary2, 1, colMeans)
apply(ary2, 2, median)
apply(ary2, 1, sort)
apply(ary2, 1, sum)
?apply

if it isn’t a matrix, you can use ‘as.matrix’
lapply (list) and there, simplify2array; tapply,
and convenience functions sweep and aggregate.

接着看lapply

用于列表

lst = list()
lst$a = 1:10
lst$b = 4:6
lst$c = 9:24
lapply(lst, sum)
lapply(lst, mean)
lapply(lst, length)

var is not suitable with matrix
too much dims
use as.numberic

myfun = function(x) (x - mean(x))^2
apply(ary, 1, myfun)
lst$d = list()
lst$d$d1 = c(2,3,4,6)
lst$d$d2 = c(3,8,9,1,23)

递归rapply

1
2
3

rapply(lst, sum)
rapply(lst, sum, how = 'replace')
lst2 = rep(60,10)

list can’t do plus-minus-caculation，but unlist can

1	unlist(lst2) - unlist(lst$a)

多元mapply

mapply(function(x,y) x-y, lst2, lst$a) # multiple lapply
mapply('-', lst2, lst$a)
# '+' ,'-' ,'*' ,'/' , all those symbols are functions
mapply(sum, lst2, lst$a)

if list2 has a total same distruction as list3

两列表间计算

list2 = list()
list2$a = 3
list2$b = c(4,4)
list3 = list()
list3$c = 5
list3$d = c(6,7)
mapply('+', list2, list3)
mapply(function(x,y) x+y, list2,list3)

第4节

change of data type

准备工作

a = matrix(1:24,4,6)
t(a)
matrix(a,2,12)
matrix(a,2,)
matrix(t(a),,2)

b = list()
b$b1 = c(1:4)
b$b2 = c(5:8)
b$b3 = c(9:12)
matrix(unlist(b),3,4)
do.call(cbind,b) # like apply function

compare

1
2
3

a = 5
b = 6
a == b

注意’= =’is not right

a != b
a > b
a < b
a >= b
a <= b

two to more

A = c(1,3,5)
B = c(3,2,5)
A == B
A > B
A > 2
A > c(1,2,3,4) # auto repeat 1/4 time, but warning

R中出现warning一定不能放过

change uppercase and lowercase

1
2
3

'feng' == 'Feng'
tolower('Feng') == 'feng'
toupper('feng')

更厉害的两数

1	all.equal(1,exp(3)/exp(3)) # 忽略浮点数的判断

问：有没有人不及格?

rec = c(1,99,61,74)
any(rec < 60) # 至少一条满足 if any
all(rec > 60) # 全部满足 if all
'zhang3' %in% c('zhang3','li4','wang5') # if in
tolower('zhang3') %in% tolower(c('ZHANG3','LI4','WANG5'))

condition test

a = 10
if (a < 10){
print(a)
}else{
print(a+100)
}

if (2018%%4 == 0){
print(‘yes’)}
{print(‘no’)}

if (2018%%4 == 0){ print(‘yes’)
}else{
print(‘no’)}

建立函数

take care of the position of ‘{}’ and ‘else’
要注意 输入是否合法，容错纠正机制；逻辑一定要清楚，最后输出可以使用list和dataframe
问题：如何判断闰年

year = c(1900:2018)
if (year%%400 == 0 | year%%4==0 & year%%100!=0){
  print()
}else {

}

print(year[which((year%%400 == 0 | year%%4==0 & year%%100!=0)=='TRUE')])

isleapyear = function(year){
  if(!is.numeric(year)) # 这个判断很关键
  {
    stop('you must specify a numerical input.') # 容错纠正机制
  }

  index = year%%400 == 0 || year%%4==0 && year%%100!=0

  out = year[index]

  return(out) #也可以使用list,dataframe
}

yr = c(1900:2018)
isleapyear(yr)

练习

applyfamily function

?apply

apply

deal with matrix & same type

mat = matrix(1:24,4,6)
ary = array(1:120,c(4,6,5))
apply(mat, 2, mean)  # means of second dim
apply(ary, 3, mean)

lapply

list

lst = list()
lst$a = 1:10
lst$b = 4:6
lst$c = 9:24
lapply(lst, sum) # deal with list
lapply(lst, length)
sapply(lst, sum) # output is vector

rapply

1
2
3

rapply(lst, sum)
rapply(lst, length)
rapply(lst, sum, how = 'replace')

tapply

irregular type 分类统计

?tapply
fac <- factor(rep_len(1:3, 17), levels = 1:5)
table(fac)
tapply(1:17, fac, sum)

tapply(x,f,g) :x为向量,f为因子列,g为操作函数

a = c(1:10)
b = c(5:14)
data = data.frame(a,b)
tapply(data$a, data$b, sum)

mapply

对多个列表或者向量参数使用函数

lst2 = rep(60,10) # list can't do plus-minus-caculation
unlist(lst2) - unlist(lst$a) # but unlist can
mapply(function(x,y) x-y, lst2, lst$a)
mapply(sum, lst2, lst$a)

2.my function

a = rnorm(100)
summary(a)

mysummary = function(lst){
  if(!is.numeric(lst))
  {
    stop('you must specify a numerical input.') # 容错纠正机制
  }

  min = min(lst)
  median = median(lst)
  mean = mean(lst)
  max = max(lst)
  var = var(lst)

  out = data.frame(min, median, mean, max, var)
  return(out)
}
mysummary(a)

3.解方程

法1

f <- function(x,a,b,c) a*x^2+b*x+c
a <- 1; b <- 5; c <- 6
delta = b^2-4*a*c
solve1 = (-b + (b^2-4*a*c)^(1/2))/(2*a)
solve2 = (-b - (b^2-4*a*c)^(1/2))/(2*a)

写出关键的地方

test = function(a,b,c){
  delta = b^2-4*a*c
if (delta == 0){
  return('只有一个根')
}else if(delta < 0 ) {
  return('有两个虚数解')
}else {
  solve1 = (-b + (b^2-4*a*c)^(1/2))/(2*a)
  solve2 = (-b - (b^2-4*a*c)^(1/2))/(2*a)
  return(data.frame('有两个实根',solve1,solve2))
}
}
test(a,b,c)

法2

求一元二次方程ax^2+bx+c=0,设a=1,b=5,c=6,求x?

1
2
3

f3 <- function(x,a,b,c) a*x^2+b*x+c
a <- 1; b <- 5; c <- 6
result1 <- uniroot(f3,c(0,-2),a=a,b=b,c=c,tol=0.0001)

用 uniroot 求解单个根

1
2
3

result2 <- uniroot(f3,c(-4,-3),a=a,b=b,c=c,tol=0.0001)
result1$root
result2$root

最后再来看一下简单的函数

函数mysummary

a = rnorm(100)
summary(a)

mysummary = function(lst){
  if(!is.numeric(lst))
  {
    stop('you must specify a numerical input.') # 容错纠正机制
  }

  min = min(lst)
  median = median(lst)
  mean = mean(lst)
  max = max(lst)
  var = var(lst)

  out = data.frame(min, median, mean, max, var)
  return(out)
}
mysummary(a)

解方程

#先输入三个向量
vec1 = c(1,2,1)
vec2 = c(2,3,2)
vec3 = c(1,2,1)
test = function(vec1,vec2,vec3){
  m = length(vec1)
  #将填入三个维度'num','condition','value'
  out = list()     

  #循环遍历
  for(i in 1:m){   
  a = vec1[i]
  b = vec2[i]
  c = vec3[i]

  #检验输入数据是否合法
  if(is.numeric(a)==FALSE|is.numeric(b)==FALSE|is.numeric(c)==FALSE){
    return('请输入数值型向量')
  }else if(length(a)!=length(b)|length(a)!=length(c)|length(b)!=length(c)){
    return('请输入相同长度的向量')
  }

  #求根
  delta = b^2-4*a*c
  if (delta == 0){
    out$number[i] = i
    out$condition[i] = '只有一个根'
    out$value[i] = -2*a/b
  }else if(delta < 0 ) {
    out$number[i] = i
    out$condition[i] = '有两个虚数解'
    out$value[i] = '无实数解'
  }else {
    solve1 = (-b + (b^2-4*a*c)^(1/2))/(2*a)
    solve2 = (-b - (b^2-4*a*c)^(1/2))/(2*a)
    out$number[i] = i
    out$condition[i] = '有两个根'
    out$value[i] = data.frame(solve1,solve2)
  }
  }
  return(data.frame(out$num,out$condition,out$value))
}

#实验检验
test = test(vec1,vec2,vec3)

感谢大家，欢迎在评论区交流讨论！

本文链接： https://konelane.github.io/2018/03/26/R语言-统计计算课程笔记/

-- EOF --

转载请注明出处署名-非商业性使用-禁止演绎 3.0 国际（CC BY-NC-ND 3.0）

￥^￥请氦核牛饮一盒奶~suki

打赏