/ase/optimize/fmin_bfgs.py - QMX - Forge du Centre Blaise Pascal

root / ase / optimize / fmin_bfgs.py

Historique | Voir | Annoter | Télécharger (15,47 ko)

       #__docformat__ = "restructuredtext en"
       # ******NOTICE***************
       # optimize.py module by Travis E. Oliphant
+      #
       # You may copy and use this module as you see fit with no
       # guarantee implied provided you keep this notice in all copies.
       # *****END NOTICE************
       import numpy
       from numpy import atleast_1d, eye, mgrid, argmin, zeros, shape, empty, \
            squeeze, vectorize, asarray, absolute, sqrt, Inf, asfarray, isinf
       from ase.utils.linesearch import LineSearch
       # These have been copied from Numeric's MLab.py
       # I don't think they made the transition to scipy_core
       # Copied and modified from scipy_optimize
       abs = absolute
       import __builtin__
       pymin = __builtin__.min
       pymax = __builtin__.max
       __version__="0.7"
       _epsilon = sqrt(numpy.finfo(float).eps)
       def fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf,
                     epsilon=_epsilon, maxiter=None, full_output=0, disp=1,
                     retall=0, callback=None, maxstep=0.2):
           """Minimize a function using the BFGS algorithm.
           Parameters:
             f : callable f(x,*args)
                 Objective function to be minimized.
             x0 : ndarray
                 Initial guess.
             fprime : callable f'(x,*args)
                 Gradient of f.
             args : tuple
                 Extra arguments passed to f and fprime.
             gtol : float
                 Gradient norm must be less than gtol before succesful termination.
             norm : float
                 Order of norm (Inf is max, -Inf is min)
             epsilon : int or ndarray
                 If fprime is approximated, use this value for the step size.
             callback : callable
                 An optional user-supplied function to call after each
                 iteration.  Called as callback(xk), where xk is the
                 current parameter vector.
           Returns: (xopt, {fopt, gopt, Hopt, func_calls, grad_calls, warnflag}, <allvecs>)
               xopt : ndarray
                   Parameters which minimize f, i.e. f(xopt) == fopt.
               fopt : float
                   Minimum value.
               gopt : ndarray
                   Value of gradient at minimum, f'(xopt), which should be near 0.
               Bopt : ndarray
                   Value of 1/f''(xopt), i.e. the inverse hessian matrix.
               func_calls : int
                   Number of function_calls made.
               grad_calls : int
                   Number of gradient calls made.
               warnflag : integer
 : Maximum number of iterations exceeded.
 : Gradient and/or function calls not changing.
               allvecs  :  list
                   Results at each iteration.  Only returned if retall is True.
           *Other Parameters*:
               maxiter : int
                   Maximum number of iterations to perform.
               full_output : bool
                   If True,return fopt, func_calls, grad_calls, and warnflag
                   in addition to xopt.
               disp : bool
                   Print convergence message if True.
               retall : bool
                   Return a list of results at each iteration if True.
           Notes:
               Optimize the function, f, whose gradient is given by fprime
               using the quasi-Newton method of Broyden, Fletcher, Goldfarb,
               and Shanno (BFGS) See Wright, and Nocedal 'Numerical
               Optimization', 1999, pg. 198.
           *See Also*:
             scikits.openopt : SciKit which offers a unified syntax to call
                               this and other solvers.
           """
           x0 = asarray(x0).squeeze()
           if x0.ndim == 0:
               x0.shape = (1,)
           if maxiter is None:
               maxiter = len(x0)*200
           func_calls, f = wrap_function(f, args)
           if fprime is None:
               grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
           else:
               grad_calls, myfprime = wrap_function(fprime, args)
           gfk = myfprime(x0)
           k = 0
           N = len(x0)
           I = numpy.eye(N,dtype=int)
           Hk = I
           old_fval = f(x0)
           old_old_fval = old_fval + 5000
           xk = x0
           if retall:
               allvecs = [x0]
           sk = [2*gtol]
           warnflag = 0
           gnorm = vecnorm(gfk,ord=norm)
           while (gnorm > gtol) and (k < maxiter):
               pk = -numpy.dot(Hk,gfk)
               ls = LineSearch()
               alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                  ls._line_search(f,myfprime,xk,pk,gfk,
                                         old_fval,old_old_fval,maxstep=maxstep)
               if alpha_k is None:  # line search failed try different one.
                   alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                            line_search(f,myfprime,xk,pk,gfk,
                                        old_fval,old_old_fval)
                   if alpha_k is None:
                       # This line search also failed to find a better solution.
                       warnflag = 2
                       break
               xkp1 = xk + alpha_k * pk
               if retall:
                   allvecs.append(xkp1)
               sk = xkp1 - xk
               xk = xkp1
               if gfkp1 is None:
                   gfkp1 = myfprime(xkp1)
               yk = gfkp1 - gfk
               gfk = gfkp1
               if callback is not None:
                   callback(xk)
               k += 1
               gnorm = vecnorm(gfk,ord=norm)
               if (gnorm <= gtol):
                   break
               try: # this was handled in numeric, let it remaines for more safety
                   rhok = 1.0 / (numpy.dot(yk,sk))
               except ZeroDivisionError:
                   rhok = 1000.0
                   print "Divide-by-zero encountered: rhok assumed large"
               if isinf(rhok): # this is patch for numpy
                   rhok = 1000.0
                   print "Divide-by-zero encountered: rhok assumed large"
               A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok
               A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok
               Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \
                        * sk[numpy.newaxis,:]
           if disp or full_output:
               fval = old_fval
           if warnflag == 2:
               if disp:
                   print "Warning: Desired error not necessarily achieved" \
                         "due to precision loss"
                   print "         Current function value: %f" % fval
                   print "         Iterations: %d" % k
                   print "         Function evaluations: %d" % func_calls[0]
                   print "         Gradient evaluations: %d" % grad_calls[0]
           elif k >= maxiter:
               warnflag = 1
               if disp:
                   print "Warning: Maximum number of iterations has been exceeded"
                   print "         Current function value: %f" % fval
                   print "         Iterations: %d" % k
                   print "         Function evaluations: %d" % func_calls[0]
                   print "         Gradient evaluations: %d" % grad_calls[0]
           else:
               if disp:
                   print "Optimization terminated successfully."
                   print "         Current function value: %f" % fval
                   print "         Iterations: %d" % k
                   print "         Function evaluations: %d" % func_calls[0]
                   print "         Gradient evaluations: %d" % grad_calls[0]
           if full_output:
               retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag
               if retall:
                   retlist += (allvecs,)
           else:
               retlist = xk
               if retall:
                   retlist = (xk, allvecs)
           return retlist
       def vecnorm(x, ord=2):
           if ord == Inf:
               return numpy.amax(abs(x))
           elif ord == -Inf:
               return numpy.amin(abs(x))
           else:
               return numpy.sum(abs(x)**ord,axis=0)**(1.0/ord)
       def wrap_function(function, args):
           ncalls = [0]
           def function_wrapper(x):
               ncalls[0] += 1
               return function(x, *args)
           return ncalls, function_wrapper
       def _cubicmin(a,fa,fpa,b,fb,c,fc):
           # finds the minimizer for a cubic polynomial that goes through the
           #  points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
+          #
           # if no minimizer can be found return None
+          #
           # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
           C = fpa
           D = fa
           db = b-a
           dc = c-a
           if (db == 0) or (dc == 0) or (b==c): return None
           denom = (db*dc)**2 * (db-dc)
           d1 = empty((2,2))
           d1[0,0] = dc**2
           d1[0,1] = -db**2
           d1[1,0] = -dc**3
           d1[1,1] = db**3
           [A,B] = numpy.dot(d1,asarray([fb-fa-C*db,fc-fa-C*dc]).flatten())
           A /= denom
           B /= denom
           radical = B*B-3*A*C
           if radical < 0:  return None
           if (A == 0): return None
           xmin = a + (-B + sqrt(radical))/(3*A)
           return xmin
       def _quadmin(a,fa,fpa,b,fb):
           # finds the minimizer for a quadratic polynomial that goes through
           #  the points (a,fa), (b,fb) with derivative at a of fpa
           # f(x) = B*(x-a)^2 + C*(x-a) + D
           D = fa
           C = fpa
           db = b-a*1.0
           if (db==0): return None
           B = (fb-D-C*db)/(db*db)
           if (B <= 0): return None
           xmin = a  - C / (2.0*B)
           return xmin
       def zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
                phi, derphi, phi0, derphi0, c1, c2):
           maxiter = 10
           i = 0
           delta1 = 0.2  # cubic interpolant check
           delta2 = 0.1  # quadratic interpolant check
           phi_rec = phi0
           a_rec = 0
           while 1:
               # interpolate to find a trial step length between a_lo and a_hi
               # Need to choose interpolation here.  Use cubic interpolation and then if the
               #  result is within delta * dalpha or outside of the interval bounded by a_lo or a_hi
               #  then use quadratic interpolation, if the result is still too close, then use bisection
               dalpha = a_hi-a_lo;
               if dalpha < 0: a,b = a_hi,a_lo
               else: a,b = a_lo, a_hi
               # minimizer of cubic interpolant
               #    (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
               #      if the result is too close to the end points (or out of the interval)
               #         then use quadratic interpolation with phi_lo, derphi_lo and phi_hi
               #      if the result is stil too close to the end points (or out of the interval)
               #         then use bisection
               if (i > 0):
                   cchk = delta1*dalpha
                   a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec)
               if (i==0) or (a_j is None) or (a_j > b-cchk) or (a_j < a+cchk):
                   qchk = delta2*dalpha
                   a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
                   if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
                       a_j = a_lo + 0.5*dalpha
       #                print "Using bisection."
       #            else: print "Using quadratic."
       #        else: print "Using cubic."
               # Check new value of a_j
               phi_aj = phi(a_j)
               if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
                   phi_rec = phi_hi
                   a_rec = a_hi
                   a_hi = a_j
                   phi_hi = phi_aj
               else:
                   derphi_aj = derphi(a_j)
                   if abs(derphi_aj) <= -c2*derphi0:
                       a_star = a_j
                       val_star = phi_aj
                       valprime_star = derphi_aj
                       break
                   if derphi_aj*(a_hi - a_lo) >= 0:
                       phi_rec = phi_hi
                       a_rec = a_hi
                       a_hi = a_lo
                       phi_hi = phi_lo
                   else:
                       phi_rec = phi_lo
                       a_rec = a_lo
                   a_lo = a_j
                   phi_lo = phi_aj
                   derphi_lo = derphi_aj
               i += 1
               if (i > maxiter):
                   a_star = a_j
                   val_star = phi_aj
                   valprime_star = None
                   break
           return a_star, val_star, valprime_star
       def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
                       args=(), c1=1e-4, c2=0.9, amax=50):
           """Find alpha that satisfies strong Wolfe conditions.
           Parameters:
               f : callable f(x,*args)
                   Objective function.
               myfprime : callable f'(x,*args)
                   Objective function gradient (can be None).
               xk : ndarray
                   Starting point.
               pk : ndarray
                   Search direction.
               gfk : ndarray
                   Gradient value for x=xk (xk being the current parameter
                   estimate).
               args : tuple
                   Additional arguments passed to objective function.
               c1 : float
                   Parameter for Armijo condition rule.
               c2 : float
                   Parameter for curvature condition rule.
           Returns:
               alpha0 : float
                   Alpha for which ``x_new = x0 + alpha * pk``.
               fc : int
                   Number of function evaluations made.
               gc : int
                   Number of gradient evaluations made.
           Notes:
               Uses the line search algorithm to enforce strong Wolfe
               conditions.  See Wright and Nocedal, 'Numerical Optimization',
 , pg. 59-60.
               For the zoom phase it uses an algorithm by [...].
           """
           global _ls_fc, _ls_gc, _ls_ingfk
           _ls_fc = 0
           _ls_gc = 0
           _ls_ingfk = None
           def phi(alpha):
               global _ls_fc
               _ls_fc += 1
               return f(xk+alpha*pk,*args)
           if isinstance(myfprime,type(())):
               def phiprime(alpha):
                   global _ls_fc, _ls_ingfk
                   _ls_fc += len(xk)+1
                   eps = myfprime[1]
                   fprime = myfprime[0]
                   newargs = (f,eps) + args
                   _ls_ingfk = fprime(xk+alpha*pk,*newargs)  # store for later use
                   return numpy.dot(_ls_ingfk,pk)
           else:
               fprime = myfprime
               def phiprime(alpha):
                   global _ls_gc, _ls_ingfk
                   _ls_gc += 1
                   _ls_ingfk = fprime(xk+alpha*pk,*args)  # store for later use
                   return numpy.dot(_ls_ingfk,pk)
           alpha0 = 0
           phi0 = old_fval
           derphi0 = numpy.dot(gfk,pk)
           alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
           if alpha1 == 0:
               # This shouldn't happen. Perhaps the increment has slipped below
               # machine precision?  For now, set the return variables skip the
               # useless while loop, and raise warnflag=2 due to possible imprecision.
               alpha_star = None
               fval_star = old_fval
               old_fval = old_old_fval
               fprime_star = None
           phi_a1 = phi(alpha1)
           #derphi_a1 = phiprime(alpha1)  evaluated below
           phi_a0 = phi0
           derphi_a0 = derphi0
           i = 1
           maxiter = 10
           while 1:         # bracketing phase
               if alpha1 == 0:
                   break
               if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
                  ((phi_a1 >= phi_a0) and (i > 1)):
                   alpha_star, fval_star, fprime_star = \
                               zoom(alpha0, alpha1, phi_a0,
                                    phi_a1, derphi_a0, phi, phiprime,
                                    phi0, derphi0, c1, c2)
                   break
               derphi_a1 = phiprime(alpha1)
               if (abs(derphi_a1) <= -c2*derphi0):
                   alpha_star = alpha1
                   fval_star = phi_a1
                   fprime_star = derphi_a1
                   break
               if (derphi_a1 >= 0):
                   alpha_star, fval_star, fprime_star = \
                               zoom(alpha1, alpha0, phi_a1,
                                    phi_a0, derphi_a1, phi, phiprime,
                                    phi0, derphi0, c1, c2)
                   break
               alpha2 = 2 * alpha1   # increase by factor of two on each iteration
               i = i + 1
               alpha0 = alpha1
               alpha1 = alpha2
               phi_a0 = phi_a1
               phi_a1 = phi(alpha1)
               derphi_a0 = derphi_a1
               # stopping test if lower function not found
               if (i > maxiter):
                   alpha_star = alpha1
                   fval_star = phi_a1
                   fprime_star = None
                   break
           if fprime_star is not None:
               # fprime_star is a number (derphi) -- so use the most recently
               # calculated gradient used in computing it derphi = gfk*pk
               # this is the gradient at the next step no need to compute it
               # again in the outer loop.
               fprime_star = _ls_ingfk
           return alpha_star, _ls_fc, _ls_gc, fval_star, old_fval, fprime_star
       def approx_fprime(xk,f,epsilon,*args):
           f0 = f(*((xk,)+args))
           grad = numpy.zeros((len(xk),), float)
           ei = numpy.zeros((len(xk),), float)
           for k in range(len(xk)):
               ei[k] = epsilon
               grad[k] = (f(*((xk+ei,)+args)) - f0)/epsilon
               ei[k] = 0.0
           return grad

Chimie Théorique » QMX

root / ase / optimize / fmin_bfgs.py