est beta hat^

y = b0 + b1×x1 + b2×x2 +…+bp×xp
如何估计bi就成了有趣的问题

1. a failing try

dataset

### setwd("C:/Users/wangyuanhe/Desktop")
attach(swiss)

x1 = swiss$Agriculture
x2 = swiss$Education
x3 = swiss$Catholic
y = swiss$Fertility

build regression y ~ x1 + x2 + x3

# dd is 0 b = seq(0.001,1,0.001) # b's range b1_0 = -0.2

main function

est_b1_3 = function(b1_0,x1,x2,x3){
  
MaxIter = 5000
tolerancelevel = 1E-3
MaxIter = 500

b2_0 = -1
b3_0 = 0.1
m = matrix(NA,3,1)
x = matrix(NA,1,3)
b1 = matrix(NA,1,45); b2 = matrix(NA,1,45); b3 = matrix(NA,1,45)
m[1,1] = b1_0; m[2,1] = b2_0; m[3,1] = b3_0

a loop

for(i in 1:45){
m[1,1] = b1[i]; m[2,1] = b2[i]; m[3,1] = b3[i]
x[1,1] = x1[i]; x[1,2] = x2[i]; x[1,3] = x3[i]
p = m
solve(m,x)

if(i>1){
if(abs(p[i-1]-p[i]) <= tolerancelevel){break}
}
}
if(i==MaxIter){warning('Maximum number of iterations reached without convergence')}

return(b1[i+1])

}

est_b1_3(x1,x2,x3)

2. successful example

data

attach(swiss)
x1 = swiss$Agriculture
x2 = swiss$Education
x3 = swiss$Catholic
y = swiss$Fertility
data = data.frame(x1 , x2, x3, y)

build matrix (maths work)

mat is beta vector 一切都建立在矩阵运算的基础上

f <- function(m = matrix(),
              y = vector(mode = 'numeric'),
              mat = vector(mode = 'numeric')){
  out <- t(y - m%*%mat)%*%(y - m%*%mat)
  return(out)
}

# first-order derivative matrix 一阶导数
## m is independent variable(matrix) dim = 3
## y is dependent variable(vector) dim = 1
d <- function(m = matrix(),
              y = vector(mode = 'numeric'),
              mat = vector(mode = 'numeric')){
  out <- -2*t(m)%*%y+2*t(m)%*%m%*%mat
  return(out)
}

# hession
dd <- function(m = matrix()){
  out <- 2*t(m)%*%m
  return(out)
}

main function to deal with this problem

牛顿迭代的步骤

构建主函数和迭代循环

一定要关注solve函数的使用

mat_esti <- function(m, y){
  MaxIter = 30 # loop times
  m <- as.matrix(m)
  y <- as.matrix(y)
  
  m <- cbind(rep(1,length(m[,1])), m) # add a col of '1'
  mat <- seq(1,length(m[1,]))
  mat1 <- matrix(mat,1,length(mat))
  
  # Newton
  for(i in 1:MaxIter){
    # 'd' & 'dd' are same-wide matrixs
    # a %*% x = b  ->  solve(a,b)=x
    mat <- mat - solve(dd(m), d(m, y, mat)) 
    mat1 <- rbind(mat1, matrix(mat,1,length(mat)))
    if(sum((mat1[i+1,] - mat1[i, ])^2) <1e-6){break}
  }
  # every beta will be stored in mat1
  #
  # write colnames, and make a better output
  names <- c('Intercept','beta 1','beta 2','beta 3')
  dim(mat) <- c(1, length(mat))
  best_mat <- as.data.frame(mat)
  for(i in 1:(length(mat)-1)){
    name <- paste(names[i],i)
  }
  colnames(best_mat) <- names
  out <- data.frame(best_mat)
  return(out)
}

output

mat_esti(data[,1:3],data[,4])
##   Intercept     beta.1    beta.2    beta.3
## 1  86.22502 -0.2030377 -1.072147 0.1452013
fit = lm(y~x1+x2+x3)
summary(fit) # check
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.178  -6.548   1.379   5.822  14.840 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 86.22502    4.73472  18.211  < 2e-16 ***
## x1          -0.20304    0.07115  -2.854  0.00662 ** 
## x2          -1.07215    0.15580  -6.881 1.91e-08 ***
## x3           0.14520    0.03015   4.817 1.84e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.728 on 43 degrees of freedom
## Multiple R-squared:  0.6423, Adjusted R-squared:  0.6173 
## F-statistic: 25.73 on 3 and 43 DF,  p-value: 1.089e-09

发现结果一模一样,我们也算是掌握了lm函数的原理
其实强大的算法依靠的都是矩阵,原理是不变的。
掌握了算法背后的本质,才能写出更深更复杂的算法!
路漫漫其修远兮,加油!