目录
2018 SC学习笔记(雾)
第1节 1 2 3 4 5 a = seq(1 ,100 ,length.out = 12 )a a<20 a [a>20 ]a [a=20 ]
function-which 返回查询值的位置坐标1 2 which (a>90 ) a [which(a>90 )]
logic condition & | ! 逻辑判断:与或非1 2 3 4 b = rep(c('M' ,'F' ),6 )a >80 & b =='M' a [a >80 & b =='M' ]which (a >80 & b =='M' )
clean environment 删除,虽然我从来没用过。。。
square root 开平方根以及一些常见的运算1 2 3 4 5 sqrt (5 ) sqrt (a) log (a) tan (a) a^2
1.1检查向量的长度 1 2 3 4 5 6 7 8 length (a )a + ba + 1 k = a + c(1 ,2 ,3 ) k - a a /b
插一句 脚本语言尽量使用向量化的书写,循环 的速度比较慢 考虑兼容性,并且考虑运算速度,还是使用向量更快
1.2向量的数据类型function-class 1 2 3 4 class (a) typeof (a) d = 'wyh' class (d)
1.3操作矩阵 1 2 3 4 5 6 7 8 mat = matrix (c(22,31,17,38,16,7),3,2)dim(mat ) # 维度 class (mat )nrow(mat ) # 几行 ncol(mat ) # 几列 mat [,1:2]mat [1:2]mat [1:2,]
在矩阵的前两行中挑出小于35的数 1 2 3 4 5 6 7 8 9 mat [1:2,]<35mat [mat [1:2,]<35]mat [which (mat [1:2,]<35)] # 经典错误,没有搞清坐标对应的对象which (mat [1:2,]<35)mat [which (mat <35)]mat [1,mat [1,]<35]
下面三段代码效果一样 1 2 3 4 5 mat[1:2, ][mat[1:2, ]<35] mat[1:2,][which(mat[1:2,]<35)] mat[which(mat[1:2,]<35,arr.ind = TRUE)] # arr.ind 这个参数是改变了返回位置值的方式 which(mat[1:2,]<35,arr.ind = TRUE) # 返回了行列的坐标
合并矩阵为向量
下面就开始瞎写了 行列互换
方阵
对角阵
function-seq 序列函数1 2 seq(0,0,length = 5) matrix(c(1 ,seq(0,0,length = 5) ),5 ,5 )
1 2 3 4 5 n = 5 mat1 = matrix(0 ,n ,n ) mat1[seq(1 ,n *n ,n +1 )] = 1 mat1 matrix(rep(c(1 ,rep(0 ,n )),n ),n ,n )
矩阵乘法 1 2 3 4 5 6 rnorm(35,5) mat2 = matrix(rnorm(35,5),5,7) mat1%*%mat2 a = matrix(1:24,6) b= t(a) p = a%*%b
矩阵求逆,秩 1 2 3 4 5 6 7 8 solve(p) qr() $rankp = matrix(c(5 ,1 ,2 ,3 ),2 ) p pinv = solve(p) pinv%*%p p%*%pinv
求行列式 1 2 det (p) #determinant ?det
求特征值 1 2 3 4 ee = eigen(p) #eigenvalue ee$ values prod(ee$ values) # 连乘product,正好等于行列式
第2节 确定双精度范围内是否相等
按行填需要调整参数byrow=TRUE 1 matrix (1 :25 ,5 ,5 ,byrow = TRUE)
三维数组array 1 2 ary = array(1:24,c(2,3,4)) ary[,1,2]
数据框data.frame ,存储不同类型数据 1 2 3 4 5 6 7 8 9 10 11 12 a = matrix(c(1 ,2 ,3 ,4 ),2 ,2 )a [4 ] = 'feng' a name = c('yang' ,'crow' ,'ruby' ,'weiss' ) age = c(19 ,28 ,17 ,14 ) sal = c(2000 ,1800 ,5000 ,2000 ) data = data.frame(name,age,sal) data sex = c('f' ,'m' ,'f' ,'f' ) cbind (data,sex)
转换数据类型,类型不合的克星
树状图数据如何存储->可嵌套的列表 1 2 3 4 5 6 7 8 9 10 11 info = list() #先定义一个空列表是精髓hhh info$wang5 = data.frame(age = 19 , work = 'yes' ) info$zhang3 = matrix(c(1 ,2 ,3 ,4 ),2 ,2 ) class (info) class ('wang5' ) info$li4 = list(age = 28 , edu = 'cufe' , sal = '180/min' ) info$li4 info$li4 $age length (info) names (info)
给列表追加新元素 1 2 3 info$zhao6 = 'Neimenggu' #直接加就行,不用初始化 info[[1 ]][1,2] = 999 info$zhang3[1,2] = 999
第3节 apply族函数 lapply,rapply 函数初体验给列表的每个元素取对数(其实什么操作都可以有) 1 2 3 4 5 lapply(info,log ) # 划重点 info[['zhang3']] = list(b1 = 10 ,b2 = 7 ) rapply(info,log ) #可递归的(r) rapply(info,log ,how = 'unlist' ) #参数how的值 默认unlist, 还有replace
把函数改成x平方 1 rapply(info , function(x ) x^2 ,how = 'replace')
deal with matrix:caculate the mean of col/row 1 2 3 mat = matrix (1:24,4,6)rowMeans(mat )# only for mat colMeans(mat )
或者你还可以使用apply函数 1 2 3 apply(mat , 2, mean ) # 2 means dim2 第二维 apply(mat , 2, median ) apply(mat , 2, var )
change to array 背景是:4 groups, 6 members, and 5 terms1 ary = array(1 :120 ,c(4 ,6 ,5 ))
caculate mean of score every term 1 2 3 4 5 apply(ary , 3 , mean) apply(ary , c(1 ,3 ), mean) # every group's mean in each term 4 *5 apply(ary , c(1 ,3 ), max) maymaxmin = function(x ) max(x )-min(x ) apply(ary , c(1 ,3 ), maymaxmin)
apply一般只作用于二维1 2 3 4 5 6 7 ary2 = array (rnorm(120 ),c(4 ,5 ,6 )) apply (ary2, 3 , mean )apply (ary2, 1 , colMeans)apply (ary2, 2 , median )apply (ary2, 1 , sort )apply (ary2, 1 , sum )?apply
if it isn’t a matrix, you can use ‘as.matrix’ lapply (list) and there, simplify2array; tapply, and convenience functions sweep and aggregate.
接着看lapply 用于列表1 2 3 4 5 6 7 lst = list() lst$a = 1 :10 lst$b = 4 :6 lst$c = 9 :24 lapply (lst, sum) lapply (lst, mean) lapply (lst, length)
var is not suitable with matrix too much dims use as.numberic
1 2 3 4 5 myfun = function(x) (x - mean(x))^2 apply(ary, 1 , myfun) lst$d = list () lst$d$d1 = c(2 ,3 ,4 ,6 ) lst$d$d2 = c(3 ,8 ,9 ,1 ,23 )
递归rapply 1 2 3 rapply (lst, sum) rapply (lst, sum, how = 'replace' ) lst2 = rep(60 ,10 )
list can’t do plus-minus-caculation,but unlist can1 unlist(lst2 ) - unlist(lst $a)
多元mapply 1 2 3 4 mapply(function(x,y) x-y, lst2, lst$a) # multiple lapply mapply('-' , lst2, lst$a) # '+' ,'-' ,'*' ,'/' , all those symbols are functions mapply(sum, lst2, lst$a)
if list2 has a total same distruction as list3 两列表间计算1 2 3 4 5 6 7 8 list2 = list() list2$a = 3 list2$b = c(4 ,4 ) list3 = list() list3$c = 5 list3$d = c(6 ,7 ) mapply ('+' , list2, list3) mapply (function(x,y) x+y, list2,list3)
第4节 change of data type 准备工作1 2 3 4 5 6 7 8 9 10 11 12 a = matrix t matrix matrix matrix,,2 ) b = list b$b1 = c b$b2 = c b$b3 = c matrix,3 ,4 ) do.call # like apply function
compare
注意’= =’is not right1 2 3 4 5 a != ba > ba < ba >= ba <= b
two to more 1 2 3 4 5 6 A = c(1 ,3 ,5 ) B = c(3 ,2 ,5 ) A == B A > B A > 2 A > c(1 ,2 ,3 ,4 ) # auto repeat 1 /4 time, but warning
R中出现warning一定不能放过
change uppercase and lowercase 1 2 3 'feng' == 'Feng' tolower ('Feng' ) == 'feng' toupper ('feng' )
更厉害的两数 1 all .equal (1 ,exp (3 )/exp (3 )) # 忽略浮点数的判断
问:有没有人不及格? 1 2 3 4 5 rec = c (1 ,99 ,61 ,74 )any(rec < 60 ) # 至少一条满足 if any all (rec > 60 ) # 全部满足 if all 'zhang3' %in% c ('zhang3' ,'li4' ,'wang5' ) # if intolower ('zhang3' ) %in% tolower (c ('ZHANG3' ,'LI4' ,'WANG5' ))
condition test a = 10 if (a < 10){ print(a) }else{ print(a+100) }
if (2018%%4 == 0){ print(‘yes’)} {print(‘no’)}
if (2018%%4 == 0){ print(‘yes’) }else{ print(‘no’)}
建立函数 take care of the position of ‘{}’ and ‘else’ 要注意 输入是否合法 ,容错纠正机制;逻辑一定要清楚,最后输出可以使用list和dataframe 问题:如何判断闰年 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 year = c(1900 :2018 )if (year %%400 == 0 | year %%4 ==0 & year %%100 !=0 ){ print() }else { } print(year [which((year %%400 == 0 | year %%4 ==0 & year %%100 !=0 )=='TRUE')]) isleapyear = function(year ){ if (!is .numeric(year )) { stop('you must specify a numerical input.') } index = year %%400 == 0 || year %%4 ==0 && year %%100 !=0 out = year [index] return (out) } yr = c(1900 :2018 ) isleapyear(yr)
练习 applyfamily function
apply deal with matrix & same type1 2 3 4 mat = matrix (1 :24 ,4 ,6 ) ary = array (1 :120 ,c(4 ,6 ,5 )) apply (mat, 2 , mean ) # means of second dim apply (ary, 3 , mean )
lapply list1 2 3 4 5 6 7 lst = list () lst$a = 1:10 lst$b = 4:6 lst$c = 9:24 lapply(lst, sum ) # deal with list lapply(lst, length) sapply(lst, sum ) # output is vector
rapply 1 2 3 rapply (lst, sum) rapply (lst, length) rapply (lst, sum, how = 'replace' )
tapply irregular type 分类统计1 2 3 4 ?tapply fac <- factor (rep_len(1:3, 17), levels = 1:5)table (fac )tapply(1:17, fac , sum )
tapply(x,f,g) :x为向量,f为因子列,g为操作函数1 2 3 4 a = c(1 :10 )b = c(5 :14 )data = data.frame(a ,b) tapply (data$a , data$b , sum)
mapply 对多个列表或者向量参数使用函数1 2 3 4 lst2 = rep(60 ,10 ) # list can't do plus-minus-caculation unlist(lst2 ) - unlist(lst $a) # but unlist can mapply(function (x ,y) x-y, lst2, lst$a) mapply(sum , lst2, lst$a)
2.my function 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 a = rnorm(100 ) summary(a) mysummary = function(lst){ if (!is .numeric(lst)) { stop('you must specify a numerical input.') # 容错纠正机制 } min = min (lst) median = median (lst) mean = mean (lst) max = max (lst) var = var (lst) out = data.frame(min , median , mean , max , var ) return (out) } mysummary(a)
3.解方程 法1 1 2 3 4 5 f <- function(x,a,b,c) a*x^2 +b*x+c a <- 1 ; b <- 5 ; c <- 6 delta = b^2 -4 *a*c solve1 = (-b + (b^2 -4 *a*c)^(1 /2 ))/(2 *a) solve2 = (-b - (b^2 -4 *a*c)^(1 /2 ))/(2 *a)
写出关键的地方1 2 3 4 5 6 7 8 9 10 11 12 13 test = function(a ,b ,c){ delta = b^2 -4 *a*c if (delta == 0 ){ return('只有一个根' ) }else if (delta < 0 ) { return('有两个虚数解' ) }else { solve1 = (-b + (b^2 -4 *a*c)^(1 /2 ))/(2 *a) solve2 = (-b - (b^2 -4 *a*c)^(1 /2 ))/(2 *a) return(data.frame('有两个实根' ,solve1,solve2)) } } test (a,b,c)
法2 求一元二次方程ax^2+bx+c=0,设a=1,b=5,c=6,求x?1 2 3 f3 <- function(x ,a,b,c ) a*x ^2 +b*x +c a <- 1
用 uniroot 求解单个根1 2 3 result2 <- uniroot(f3,c(-4 ,-3 ),a=a ,b=b ,c=c,tol=0.0001 ) result1$root result2$root
最后再来看一下简单的函数 函数mysummary 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 a = rnorm(100 ) summary(a) mysummary = function(lst){ if (!is .numeric(lst)) { stop('you must specify a numerical input.') # 容错纠正机制 } min = min (lst) median = median (lst) mean = mean (lst) max = max (lst) var = var (lst) out = data.frame(min , median , mean , max , var ) return (out) } mysummary(a)
解方程 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 #先输入三个向量 vec1 = c(1,2,1) vec2 = c(2,3,2) vec3 = c(1,2,1) test = function(vec1,vec2,vec3){ m = length (vec1) #将填入三个维度'num','condition','value' out = list () #循环遍历 for (i in 1:m ){ a = vec1[i] b = vec2[i] c = vec3[i] #检验输入数据是否合法 if (is.numeric(a)==FALSE|is.numeric(b)==FALSE|is.numeric(c)==FALSE){ return ('请输入数值型向量') }else if (length (a)!=length (b)|length (a)!=length (c)|length (b)!=length (c)){ return ('请输入相同长度的向量') } #求根 delta = b^2-4*a*c if (delta == 0){ out $number [i] = i out $condition [i] = '只有一个根' out $value [i] = -2*a/b }else if (delta < 0 ) { out $number [i] = i out $condition [i] = '有两个虚数解' out $value [i] = '无实数解' }else { solve1 = (-b + (b^2-4*a*c)^(1/2))/(2*a) solve2 = (-b - (b^2-4*a*c)^(1/2))/(2*a) out $number [i] = i out $condition [i] = '有两个根' out $value [i] = data.frame(solve1,solve2) } } return (data.frame(out $num ,out $condition ,out $value )) } #实验检验 test = test (vec1,vec2,vec3)
感谢大家,欢迎在评论区交流讨论!
本文链接:
https://konelane.github.io/2018/03/26/R语言-统计计算课程笔记/
-- EOF --
转载请注明出处 署名-非商业性使用-禁止演绎 3.0 国际(CC BY-NC-ND 3.0)