首先奉上老师的主页 左侧的教学中有本课程的代码

(要你何用系列)

上节说到

牛顿迭代,求极值(不一定是全局最值)

学习优化之前先学习绘图
为的是能把做的代码展示出来
能够穿插使用,更加直观

(作业:继续完善上次作业)

x = c(25734,22375,21476,16042,15454,15360,15033,14427,10549,10544,
      10501,10491,10147,10118,10016,9240,8862,8121,8030,7986,7927,7903,7712,7691,7021,
      6848,6775,6558,6236,6186,5681)
data = x

mu = seq(5000,28000,100) 
sigma = var(x) #### 取值

g = function(mu,data)
{
  out = sum(dnorm(data, mean(x), sd = var(x),log = TRUE))
  return(out)
}

nralgo1<-function(mu0,func,deri){
  epsilon<-1E-2 #Set Tolerance Level 精确度
  MaxIter<-5000 #Maximum Number of Iterations
  mu<-rep(NA,MaxIter+1) #A vector to store sequence of mu
  mu[1]<-mu0 # Initialise
  data = x
  xplot = matrix(NA,nrow = MaxIter+1)
 
  for(n in 1:MaxIter){
    mu[n+1]<-mu[n]-(func(mu[n],data)/deri)#Update Step
    
    if(abs(func(mu[n+1],data)-func(mean(x),data))<=epsilon){break} #Stopping Rule
  }
  
  if(n==MaxIter){warning('Maximum number of iterations reached without convergence')}
  
  return(mu[n+1]) #Return value of x
}

nralgo1(10000,g,1)
## [1] 10557.4

如何将牛顿迭代拓展到更高维的情况

两个准备:
- y对于x1~xd的一阶偏导数矩阵
- y对于x1~xd的二阶偏导数矩阵

# 一阶偏导数(矩阵)
d1x1 = function(x,x2){
  out = 2*x-x2
  return(out)
}
d1x2 = function(x1,x){
  out = -x1 + 2*x + exp(x)
  return(out)
}
dydx = function(x1,x2){
  out = matrix(c(d1x1(x1,x2),d1x2(x1,x2)),nrow = 1,ncol = 2)
  return(out)
}
dydx(2,3)
##      [,1]     [,2]
## [1,]    1 24.08554
# 二阶偏导数(黑塞矩阵)

d2ydx2 = function(x)
{
  d2x1x1 = 2
  d2x1x2 = -1
  d2x2x1 = -1
  d2x2x2 = function(x2){
  out = exp(x2) + 2
  return(out)
}
  out = matrix(4,nrow = 2,ncol = 2)
  out[1] = 1
  out[2] = -1
  out[3] = -1
  out[4] = d2x2x2(x)
  return(out)
}

d2ydx2(3)
##      [,1]     [,2]
## [1,]    1 -1.00000
## [2,]   -1 22.08554
## 验证hessian矩阵大小

det(d2ydx2(3))
## [1] 21.08554
eigen(d2ydx2(3))
## eigen() decomposition
## $values
## [1] 22.1328566  0.9526803
## 
## $vectors
##             [,1]        [,2]
## [1,] -0.04726679 -0.99888230
## [2,]  0.99888230 -0.04726679
# 两个特征值都大于零,故正定

今天的任务:

学习下面的代码 用牛顿迭代求多元线性回归的 贝塔i

#There are two ways to code up the functions.  Either with two
#scalar inputs or one vector input

#func<-function(x1,x2){x1^2-(x1*x2)+(x2^2)+exp(x2)}
#grad<-function(x1,x2){c(2*x1-x2,-x1+2*x2+exp(x2))}
#hess<-function(x1,x2){matrix(c(2,-1,-1,2+exp(x2)),2,2)}



func<-function(x){x[1]^2-(x[1]*x[2])+(x[2]^2)+exp(x[2])} #Function
grad<-function(x){c(2*x[1]-x[2],-x[1]+2*x[2]+exp(x[2]))} #Gradient
hess<-function(x){matrix(c(2,-1,-1,2+exp(x[2])),2,2)} #Hessian

#Function for Newton's Method

nralgo<-function(x0,f,g,h){
  epsilon<-1E-10 #Tolerance
  MaxIter<-500 #Maximum Iterations
  n<-1 #Initialise n
  xnew<-x0-solve(h(x0),g(x0)) #Initialise xnew
  change<-abs(f(xnew)-f(x0)) #Initialise change in function value
  while((n<=MaxIter)&&(change>epsilon)){ #Note while loop breaks if either MaxIter reached of change in function value is less than tolerance
    n<-n+1 #Update n
    xnew->xold #New becomes old. Pay attention to the direction of the arrow
    xnew<-xold-solve(h(xold),g(xold)) #xnew is updated
    change<-abs(f(xnew)-f(xold)) #Change in function value updated
    }
    return(xnew)
}

x0<-c(2,1) #Initial value
x_Newton<-nralgo(x0,func,grad,hess) #Solution using Newton
o_BFGS<-optim(x0,func,grad,method="BFGS")
x_BFGS<-o_BFGS$par
o_LBFGSB<-optim(x0,func,grad,method="L-BFGS-B")
x_LBFGSB<-o_LBFGSB$par

#Print out all answers.  All give the same root
print(paste('Newton Method:',x_Newton))
## [1] "Newton Method: -0.216281377766" "Newton Method: -0.432562755532"
print(paste(' BFGS Method:',x_BFGS))
## [1] " BFGS Method: -0.216281523717151" " BFGS Method: -0.432562410421663"
print(paste('L-BFGS-B Method:',x_LBFGSB))
## [1] "L-BFGS-B Method: -0.216281784611709"
## [2] "L-BFGS-B Method: -0.432563097230315"