首先奉上老师的主页 左侧的教学中有本课程的代码
(要你何用系列)
牛顿迭代,求极值(不一定是全局最值)
学习优化之前先学习绘图
为的是能把做的代码展示出来
能够穿插使用,更加直观
(作业:继续完善上次作业)
x = c(25734,22375,21476,16042,15454,15360,15033,14427,10549,10544,
10501,10491,10147,10118,10016,9240,8862,8121,8030,7986,7927,7903,7712,7691,7021,
6848,6775,6558,6236,6186,5681)
data = x
mu = seq(5000,28000,100)
sigma = var(x) #### 取值
g = function(mu,data)
{
out = sum(dnorm(data, mean(x), sd = var(x),log = TRUE))
return(out)
}
nralgo1<-function(mu0,func,deri){
epsilon<-1E-2 #Set Tolerance Level 精确度
MaxIter<-5000 #Maximum Number of Iterations
mu<-rep(NA,MaxIter+1) #A vector to store sequence of mu
mu[1]<-mu0 # Initialise
data = x
xplot = matrix(NA,nrow = MaxIter+1)
for(n in 1:MaxIter){
mu[n+1]<-mu[n]-(func(mu[n],data)/deri)#Update Step
if(abs(func(mu[n+1],data)-func(mean(x),data))<=epsilon){break} #Stopping Rule
}
if(n==MaxIter){warning('Maximum number of iterations reached without convergence')}
return(mu[n+1]) #Return value of x
}
nralgo1(10000,g,1)
## [1] 10557.4
两个准备:
- y对于x1~xd的一阶偏导数矩阵
- y对于x1~xd的二阶偏导数矩阵
# 一阶偏导数(矩阵)
d1x1 = function(x,x2){
out = 2*x-x2
return(out)
}
d1x2 = function(x1,x){
out = -x1 + 2*x + exp(x)
return(out)
}
dydx = function(x1,x2){
out = matrix(c(d1x1(x1,x2),d1x2(x1,x2)),nrow = 1,ncol = 2)
return(out)
}
dydx(2,3)
## [,1] [,2]
## [1,] 1 24.08554
# 二阶偏导数(黑塞矩阵)
d2ydx2 = function(x)
{
d2x1x1 = 2
d2x1x2 = -1
d2x2x1 = -1
d2x2x2 = function(x2){
out = exp(x2) + 2
return(out)
}
out = matrix(4,nrow = 2,ncol = 2)
out[1] = 1
out[2] = -1
out[3] = -1
out[4] = d2x2x2(x)
return(out)
}
d2ydx2(3)
## [,1] [,2]
## [1,] 1 -1.00000
## [2,] -1 22.08554
## 验证hessian矩阵大小
det(d2ydx2(3))
## [1] 21.08554
eigen(d2ydx2(3))
## eigen() decomposition
## $values
## [1] 22.1328566 0.9526803
##
## $vectors
## [,1] [,2]
## [1,] -0.04726679 -0.99888230
## [2,] 0.99888230 -0.04726679
# 两个特征值都大于零,故正定
学习下面的代码 用牛顿迭代求多元线性回归的 贝塔i
#There are two ways to code up the functions. Either with two
#scalar inputs or one vector input
#func<-function(x1,x2){x1^2-(x1*x2)+(x2^2)+exp(x2)}
#grad<-function(x1,x2){c(2*x1-x2,-x1+2*x2+exp(x2))}
#hess<-function(x1,x2){matrix(c(2,-1,-1,2+exp(x2)),2,2)}
func<-function(x){x[1]^2-(x[1]*x[2])+(x[2]^2)+exp(x[2])} #Function
grad<-function(x){c(2*x[1]-x[2],-x[1]+2*x[2]+exp(x[2]))} #Gradient
hess<-function(x){matrix(c(2,-1,-1,2+exp(x[2])),2,2)} #Hessian
#Function for Newton's Method
nralgo<-function(x0,f,g,h){
epsilon<-1E-10 #Tolerance
MaxIter<-500 #Maximum Iterations
n<-1 #Initialise n
xnew<-x0-solve(h(x0),g(x0)) #Initialise xnew
change<-abs(f(xnew)-f(x0)) #Initialise change in function value
while((n<=MaxIter)&&(change>epsilon)){ #Note while loop breaks if either MaxIter reached of change in function value is less than tolerance
n<-n+1 #Update n
xnew->xold #New becomes old. Pay attention to the direction of the arrow
xnew<-xold-solve(h(xold),g(xold)) #xnew is updated
change<-abs(f(xnew)-f(xold)) #Change in function value updated
}
return(xnew)
}
x0<-c(2,1) #Initial value
x_Newton<-nralgo(x0,func,grad,hess) #Solution using Newton
o_BFGS<-optim(x0,func,grad,method="BFGS")
x_BFGS<-o_BFGS$par
o_LBFGSB<-optim(x0,func,grad,method="L-BFGS-B")
x_LBFGSB<-o_LBFGSB$par
#Print out all answers. All give the same root
print(paste('Newton Method:',x_Newton))
## [1] "Newton Method: -0.216281377766" "Newton Method: -0.432562755532"
print(paste(' BFGS Method:',x_BFGS))
## [1] " BFGS Method: -0.216281523717151" " BFGS Method: -0.432562410421663"
print(paste('L-BFGS-B Method:',x_LBFGSB))
## [1] "L-BFGS-B Method: -0.216281784611709"
## [2] "L-BFGS-B Method: -0.432563097230315"