build matrix (maths work)
mat is beta vector 一切都建立在矩阵运算的基础上
f <- function(m = matrix(),
y = vector(mode = 'numeric'),
mat = vector(mode = 'numeric')){
out <- t(y - m%*%mat)%*%(y - m%*%mat)
return(out)
}
# first-order derivative matrix 一阶导数
## m is independent variable(matrix) dim = 3
## y is dependent variable(vector) dim = 1
d <- function(m = matrix(),
y = vector(mode = 'numeric'),
mat = vector(mode = 'numeric')){
out <- -2*t(m)%*%y+2*t(m)%*%m%*%mat
return(out)
}
# hession
dd <- function(m = matrix()){
out <- 2*t(m)%*%m
return(out)
}
main function to deal with this problem
牛顿迭代的步骤
构建主函数和迭代循环
一定要关注solve函数的使用
mat_esti <- function(m, y){
MaxIter = 30 # loop times
m <- as.matrix(m)
y <- as.matrix(y)
m <- cbind(rep(1,length(m[,1])), m) # add a col of '1'
mat <- seq(1,length(m[1,]))
mat1 <- matrix(mat,1,length(mat))
# Newton
for(i in 1:MaxIter){
# 'd' & 'dd' are same-wide matrixs
# a %*% x = b -> solve(a,b)=x
mat <- mat - solve(dd(m), d(m, y, mat))
mat1 <- rbind(mat1, matrix(mat,1,length(mat)))
if(sum((mat1[i+1,] - mat1[i, ])^2) <1e-6){break}
}
# every beta will be stored in mat1
#
# write colnames, and make a better output
names <- c('Intercept','beta 1','beta 2','beta 3')
dim(mat) <- c(1, length(mat))
best_mat <- as.data.frame(mat)
for(i in 1:(length(mat)-1)){
name <- paste(names[i],i)
}
colnames(best_mat) <- names
out <- data.frame(best_mat)
return(out)
}
output
mat_esti(data[,1:3],data[,4])
## Intercept beta.1 beta.2 beta.3
## 1 86.22502 -0.2030377 -1.072147 0.1452013
fit = lm(y~x1+x2+x3)
summary(fit) # check
##
## Call:
## lm(formula = y ~ x1 + x2 + x3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.178 -6.548 1.379 5.822 14.840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.22502 4.73472 18.211 < 2e-16 ***
## x1 -0.20304 0.07115 -2.854 0.00662 **
## x2 -1.07215 0.15580 -6.881 1.91e-08 ***
## x3 0.14520 0.03015 4.817 1.84e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.728 on 43 degrees of freedom
## Multiple R-squared: 0.6423, Adjusted R-squared: 0.6173
## F-statistic: 25.73 on 3 and 43 DF, p-value: 1.089e-09
发现结果一模一样,我们也算是掌握了lm函数的原理
其实强大的算法依靠的都是矩阵,原理是不变的。
掌握了算法背后的本质,才能写出更深更复杂的算法!
路漫漫其修远兮,加油!