root / ase / optimize / lbfgs.py @ 5
Historique | Voir | Annoter | Télécharger (10,85 ko)
| 1 | 1 | tkerber | # -*- coding: utf-8 -*-
|
|---|---|---|---|
| 2 | 1 | tkerber | import sys |
| 3 | 1 | tkerber | import numpy as np |
| 4 | 1 | tkerber | from ase.optimize.optimize import Optimizer |
| 5 | 1 | tkerber | from ase.utils.linesearch import LineSearch |
| 6 | 1 | tkerber | |
| 7 | 1 | tkerber | class LBFGS(Optimizer): |
| 8 | 1 | tkerber | """Limited memory BFGS optimizer.
|
| 9 | 1 | tkerber |
|
| 10 | 1 | tkerber | A limited memory version of the bfgs algorithm. Unlike the bfgs algorithm
|
| 11 | 1 | tkerber | used in bfgs.py, the inverse of Hessian matrix is updated. The inverse
|
| 12 | 1 | tkerber | Hessian is represented only as a diagonal matrix to save memory
|
| 13 | 1 | tkerber |
|
| 14 | 1 | tkerber | """
|
| 15 | 1 | tkerber | def __init__(self, atoms, restart=None, logfile='-', trajectory=None, |
| 16 | 1 | tkerber | maxstep=None, memory=100, damping = 1.0, alpha = 10.0, |
| 17 | 1 | tkerber | use_line_search=False):
|
| 18 | 1 | tkerber | """
|
| 19 | 1 | tkerber | Parameters:
|
| 20 | 1 | tkerber |
|
| 21 | 1 | tkerber | restart: string
|
| 22 | 1 | tkerber | Pickle file used to store vectors for updating the inverse of Hessian
|
| 23 | 1 | tkerber | matrix. If set, file with such a name will be searched and information
|
| 24 | 1 | tkerber | stored will be used, if the file exists.
|
| 25 | 1 | tkerber |
|
| 26 | 1 | tkerber | logfile: string
|
| 27 | 1 | tkerber | Where should output go. None for no output, '-' for stdout.
|
| 28 | 1 | tkerber |
|
| 29 | 1 | tkerber | trajectory: string
|
| 30 | 1 | tkerber | Pickle file used to store trajectory of atomic movement.
|
| 31 | 1 | tkerber |
|
| 32 | 1 | tkerber | maxstep: float
|
| 33 | 1 | tkerber | How far is a single atom allowed to move. This is useful for DFT
|
| 34 | 1 | tkerber | calculations where wavefunctions can be reused if steps are small.
|
| 35 | 1 | tkerber | Default is 0.04 Angstrom.
|
| 36 | 1 | tkerber |
|
| 37 | 1 | tkerber | memory: int
|
| 38 | 1 | tkerber | Number of steps to be stored. Default value is 100. Three numpy
|
| 39 | 1 | tkerber | arrays of this length containing floats are stored.
|
| 40 | 1 | tkerber |
|
| 41 | 1 | tkerber | damping: float
|
| 42 | 1 | tkerber | The calculated step is multiplied with this number before added to
|
| 43 | 1 | tkerber | the positions.
|
| 44 | 1 | tkerber |
|
| 45 | 1 | tkerber | alpha: float
|
| 46 | 1 | tkerber | Initial guess for the Hessian (curvature of energy surface). A
|
| 47 | 1 | tkerber | conservative value of 70.0 is the default, but number of needed
|
| 48 | 1 | tkerber | steps to converge might be less if a lower value is used. However,
|
| 49 | 1 | tkerber | a lower value also means risk of instability.
|
| 50 | 1 | tkerber |
|
| 51 | 1 | tkerber | """
|
| 52 | 1 | tkerber | Optimizer.__init__(self, atoms, restart, logfile, trajectory)
|
| 53 | 1 | tkerber | |
| 54 | 1 | tkerber | if maxstep is not None: |
| 55 | 1 | tkerber | if maxstep > 1.0: |
| 56 | 1 | tkerber | raise ValueError('You are using a much too large value for ' + |
| 57 | 1 | tkerber | 'the maximum step size: %.1f Angstrom' % maxstep)
|
| 58 | 1 | tkerber | self.maxstep = maxstep
|
| 59 | 1 | tkerber | else:
|
| 60 | 1 | tkerber | self.maxstep = 0.04 |
| 61 | 1 | tkerber | |
| 62 | 1 | tkerber | self.memory = memory
|
| 63 | 1 | tkerber | self.H0 = 1. / alpha # Initial approximation of inverse Hessian |
| 64 | 1 | tkerber | # 1./70. is to emulate the behaviour of BFGS
|
| 65 | 1 | tkerber | # Note that this is never changed!
|
| 66 | 1 | tkerber | self.damping = damping
|
| 67 | 1 | tkerber | self.use_line_search = use_line_search
|
| 68 | 1 | tkerber | self.p = None |
| 69 | 1 | tkerber | self.function_calls = 0 |
| 70 | 1 | tkerber | self.force_calls = 0 |
| 71 | 1 | tkerber | |
| 72 | 1 | tkerber | def initialize(self): |
| 73 | 1 | tkerber | """Initalize everything so no checks have to be done in step"""
|
| 74 | 1 | tkerber | self.iteration = 0 |
| 75 | 1 | tkerber | self.s = []
|
| 76 | 1 | tkerber | self.y = []
|
| 77 | 1 | tkerber | self.rho = [] # Store also rho, to avoid calculationg the dot product |
| 78 | 1 | tkerber | # again and again
|
| 79 | 1 | tkerber | |
| 80 | 1 | tkerber | self.r0 = None |
| 81 | 1 | tkerber | self.f0 = None |
| 82 | 1 | tkerber | self.e0 = None |
| 83 | 1 | tkerber | self.task = 'START' |
| 84 | 1 | tkerber | self.load_restart = False |
| 85 | 1 | tkerber | |
| 86 | 1 | tkerber | def read(self): |
| 87 | 1 | tkerber | """Load saved arrays to reconstruct the Hessian"""
|
| 88 | 1 | tkerber | self.iteration, self.s, self.y, self.rho, \ |
| 89 | 1 | tkerber | self.r0, self.f0, self.e0, self.task = self.load() |
| 90 | 1 | tkerber | self.load_restart = True |
| 91 | 1 | tkerber | |
| 92 | 1 | tkerber | def step(self, f): |
| 93 | 1 | tkerber | """Take a single step
|
| 94 | 1 | tkerber |
|
| 95 | 1 | tkerber | Use the given forces, update the history and calculate the next step --
|
| 96 | 1 | tkerber | then take it"""
|
| 97 | 1 | tkerber | r = self.atoms.get_positions()
|
| 98 | 1 | tkerber | p0 = self.p
|
| 99 | 1 | tkerber | |
| 100 | 1 | tkerber | self.update(r, f, self.r0, self.f0) |
| 101 | 1 | tkerber | |
| 102 | 1 | tkerber | s = self.s
|
| 103 | 1 | tkerber | y = self.y
|
| 104 | 1 | tkerber | rho = self.rho
|
| 105 | 1 | tkerber | H0 = self.H0
|
| 106 | 1 | tkerber | |
| 107 | 1 | tkerber | loopmax = np.min([self.memory, self.iteration]) |
| 108 | 1 | tkerber | a = np.empty((loopmax,), dtype=np.float64) |
| 109 | 1 | tkerber | |
| 110 | 1 | tkerber | ### The algorithm itself:
|
| 111 | 1 | tkerber | q = - f.reshape(-1)
|
| 112 | 1 | tkerber | for i in range(loopmax - 1, -1, -1): |
| 113 | 1 | tkerber | a[i] = rho[i] * np.dot(s[i], q) |
| 114 | 1 | tkerber | q -= a[i] * y[i] |
| 115 | 1 | tkerber | z = H0 * q |
| 116 | 1 | tkerber | |
| 117 | 1 | tkerber | for i in range(loopmax): |
| 118 | 1 | tkerber | b = rho[i] * np.dot(y[i], z) |
| 119 | 1 | tkerber | z += s[i] * (a[i] - b) |
| 120 | 1 | tkerber | |
| 121 | 1 | tkerber | self.p = - z.reshape((-1, 3)) |
| 122 | 1 | tkerber | ###
|
| 123 | 1 | tkerber | |
| 124 | 1 | tkerber | g = -f |
| 125 | 1 | tkerber | if self.use_line_search == True: |
| 126 | 1 | tkerber | e = self.func(r)
|
| 127 | 1 | tkerber | self.line_search(r, g, e)
|
| 128 | 1 | tkerber | dr = (self.alpha_k * self.p).reshape(len(self.atoms),-1) |
| 129 | 1 | tkerber | else:
|
| 130 | 1 | tkerber | self.force_calls += 1 |
| 131 | 1 | tkerber | self.function_calls += 1 |
| 132 | 1 | tkerber | dr = self.determine_step(self.p) * self.damping |
| 133 | 1 | tkerber | self.atoms.set_positions(r+dr)
|
| 134 | 1 | tkerber | |
| 135 | 1 | tkerber | self.iteration += 1 |
| 136 | 1 | tkerber | self.r0 = r
|
| 137 | 1 | tkerber | self.f0 = -g
|
| 138 | 1 | tkerber | self.dump((self.iteration, self.s, self.y, |
| 139 | 1 | tkerber | self.rho, self.r0, self.f0, self.e0, self.task)) |
| 140 | 1 | tkerber | |
| 141 | 1 | tkerber | def determine_step(self, dr): |
| 142 | 1 | tkerber | """Determine step to take according to maxstep
|
| 143 | 1 | tkerber |
|
| 144 | 1 | tkerber | Normalize all steps as the largest step. This way
|
| 145 | 1 | tkerber | we still move along the eigendirection.
|
| 146 | 1 | tkerber | """
|
| 147 | 1 | tkerber | steplengths = (dr**2).sum(1)**0.5 |
| 148 | 1 | tkerber | longest_step = np.max(steplengths) |
| 149 | 1 | tkerber | if longest_step >= self.maxstep: |
| 150 | 1 | tkerber | dr *= self.maxstep / longest_step
|
| 151 | 1 | tkerber | |
| 152 | 1 | tkerber | return dr
|
| 153 | 1 | tkerber | |
| 154 | 1 | tkerber | def update(self, r, f, r0, f0): |
| 155 | 1 | tkerber | """Update everything that is kept in memory
|
| 156 | 1 | tkerber |
|
| 157 | 1 | tkerber | This function is mostly here to allow for replay_trajectory.
|
| 158 | 1 | tkerber | """
|
| 159 | 1 | tkerber | if self.iteration > 0: |
| 160 | 1 | tkerber | s0 = r.reshape(-1) - r0.reshape(-1) |
| 161 | 1 | tkerber | self.s.append(s0)
|
| 162 | 1 | tkerber | |
| 163 | 1 | tkerber | # We use the gradient which is minus the force!
|
| 164 | 1 | tkerber | y0 = f0.reshape(-1) - f.reshape(-1) |
| 165 | 1 | tkerber | self.y.append(y0)
|
| 166 | 1 | tkerber | |
| 167 | 1 | tkerber | rho0 = 1.0 / np.dot(y0, s0)
|
| 168 | 1 | tkerber | self.rho.append(rho0)
|
| 169 | 1 | tkerber | |
| 170 | 1 | tkerber | if self.iteration > self.memory: |
| 171 | 1 | tkerber | self.s.pop(0) |
| 172 | 1 | tkerber | self.y.pop(0) |
| 173 | 1 | tkerber | self.rho.pop(0) |
| 174 | 1 | tkerber | |
| 175 | 1 | tkerber | |
| 176 | 1 | tkerber | def replay_trajectory(self, traj): |
| 177 | 1 | tkerber | """Initialize history from old trajectory."""
|
| 178 | 1 | tkerber | if isinstance(traj, str): |
| 179 | 1 | tkerber | from ase.io.trajectory import PickleTrajectory |
| 180 | 1 | tkerber | traj = PickleTrajectory(traj, 'r')
|
| 181 | 1 | tkerber | r0 = None
|
| 182 | 1 | tkerber | f0 = None
|
| 183 | 1 | tkerber | # The last element is not added, as we get that for free when taking
|
| 184 | 1 | tkerber | # the first qn-step after the replay
|
| 185 | 1 | tkerber | for i in range(0, len(traj) - 1): |
| 186 | 1 | tkerber | r = traj[i].get_positions() |
| 187 | 1 | tkerber | f = traj[i].get_forces() |
| 188 | 1 | tkerber | self.update(r, f, r0, f0)
|
| 189 | 1 | tkerber | r0 = r.copy() |
| 190 | 1 | tkerber | f0 = f.copy() |
| 191 | 1 | tkerber | self.iteration += 1 |
| 192 | 1 | tkerber | self.r0 = r0
|
| 193 | 1 | tkerber | self.f0 = f0
|
| 194 | 1 | tkerber | |
| 195 | 1 | tkerber | def func(self, x): |
| 196 | 1 | tkerber | """Objective function for use of the optimizers"""
|
| 197 | 1 | tkerber | self.atoms.set_positions(x.reshape(-1, 3)) |
| 198 | 1 | tkerber | self.function_calls += 1 |
| 199 | 1 | tkerber | return self.atoms.get_potential_energy() |
| 200 | 1 | tkerber | |
| 201 | 1 | tkerber | def fprime(self, x): |
| 202 | 1 | tkerber | """Gradient of the objective function for use of the optimizers"""
|
| 203 | 1 | tkerber | self.atoms.set_positions(x.reshape(-1, 3)) |
| 204 | 1 | tkerber | self.force_calls += 1 |
| 205 | 1 | tkerber | # Remember that forces are minus the gradient!
|
| 206 | 1 | tkerber | return - self.atoms.get_forces().reshape(-1) |
| 207 | 1 | tkerber | |
| 208 | 1 | tkerber | def line_search(self, r, g, e): |
| 209 | 1 | tkerber | self.p = self.p.ravel() |
| 210 | 1 | tkerber | p_size = np.sqrt((self.p **2).sum()) |
| 211 | 1 | tkerber | if p_size <= np.sqrt(len(self.atoms) * 1e-10): |
| 212 | 1 | tkerber | self.p /= (p_size / np.sqrt(len(self.atoms)*1e-10)) |
| 213 | 1 | tkerber | g = g.ravel() |
| 214 | 1 | tkerber | r = r.ravel() |
| 215 | 1 | tkerber | ls = LineSearch() |
| 216 | 1 | tkerber | self.alpha_k, e, self.e0, self.no_update = \ |
| 217 | 1 | tkerber | ls._line_search(self.func, self.fprime, r, self.p, g, e, self.e0, |
| 218 | 1 | tkerber | maxstep=self.maxstep, c1=.23, |
| 219 | 1 | tkerber | c2=.46, stpmax=50.) |
| 220 | 1 | tkerber | |
| 221 | 1 | tkerber | class LBFGSLineSearch(LBFGS): |
| 222 | 1 | tkerber | """This optimizer uses the LBFGS algorithm, but does a line search that fulfills
|
| 223 | 1 | tkerber | the Wolff conditions.
|
| 224 | 1 | tkerber | """
|
| 225 | 1 | tkerber | |
| 226 | 1 | tkerber | def __init__(self, *args, **kwargs): |
| 227 | 1 | tkerber | kwargs['use_line_search'] = True |
| 228 | 1 | tkerber | LBFGS.__init__(self, *args, **kwargs)
|
| 229 | 1 | tkerber | |
| 230 | 1 | tkerber | # """Modified version of LBFGS.
|
| 231 | 1 | tkerber | #
|
| 232 | 1 | tkerber | # This optimizer uses the LBFGS algorithm, but does a line search for the
|
| 233 | 1 | tkerber | # minimum along the search direction. This is done by issuing an additional
|
| 234 | 1 | tkerber | # force call for each step, thus doubling the number of calculations.
|
| 235 | 1 | tkerber | #
|
| 236 | 1 | tkerber | # Additionally the Hessian is reset if the new guess is not sufficiently
|
| 237 | 1 | tkerber | # better than the old one.
|
| 238 | 1 | tkerber | # """
|
| 239 | 1 | tkerber | # def __init__(self, *args, **kwargs):
|
| 240 | 1 | tkerber | # self.dR = kwargs.pop('dR', 0.1)
|
| 241 | 1 | tkerber | # LBFGS.__init__(self, *args, **kwargs)
|
| 242 | 1 | tkerber | #
|
| 243 | 1 | tkerber | # def update(self, r, f, r0, f0):
|
| 244 | 1 | tkerber | # """Update everything that is kept in memory
|
| 245 | 1 | tkerber | #
|
| 246 | 1 | tkerber | # This function is mostly here to allow for replay_trajectory.
|
| 247 | 1 | tkerber | # """
|
| 248 | 1 | tkerber | # if self.iteration > 0:
|
| 249 | 1 | tkerber | # a1 = abs(np.dot(f.reshape(-1), f0.reshape(-1)))
|
| 250 | 1 | tkerber | # a2 = np.dot(f0.reshape(-1), f0.reshape(-1))
|
| 251 | 1 | tkerber | # if not (a1 <= 0.5 * a2 and a2 != 0):
|
| 252 | 1 | tkerber | # # Reset optimization
|
| 253 | 1 | tkerber | # self.initialize()
|
| 254 | 1 | tkerber | #
|
| 255 | 1 | tkerber | # # Note that the reset above will set self.iteration to 0 again
|
| 256 | 1 | tkerber | # # which is why we should check again
|
| 257 | 1 | tkerber | # if self.iteration > 0:
|
| 258 | 1 | tkerber | # s0 = r.reshape(-1) - r0.reshape(-1)
|
| 259 | 1 | tkerber | # self.s.append(s0)
|
| 260 | 1 | tkerber | #
|
| 261 | 1 | tkerber | # # We use the gradient which is minus the force!
|
| 262 | 1 | tkerber | # y0 = f0.reshape(-1) - f.reshape(-1)
|
| 263 | 1 | tkerber | # self.y.append(y0)
|
| 264 | 1 | tkerber | #
|
| 265 | 1 | tkerber | # rho0 = 1.0 / np.dot(y0, s0)
|
| 266 | 1 | tkerber | # self.rho.append(rho0)
|
| 267 | 1 | tkerber | #
|
| 268 | 1 | tkerber | # if self.iteration > self.memory:
|
| 269 | 1 | tkerber | # self.s.pop(0)
|
| 270 | 1 | tkerber | # self.y.pop(0)
|
| 271 | 1 | tkerber | # self.rho.pop(0)
|
| 272 | 1 | tkerber | #
|
| 273 | 1 | tkerber | # def determine_step(self, dr):
|
| 274 | 1 | tkerber | # f = self.atoms.get_forces()
|
| 275 | 1 | tkerber | #
|
| 276 | 1 | tkerber | # # Unit-vector along the search direction
|
| 277 | 1 | tkerber | # du = dr / np.sqrt(np.dot(dr.reshape(-1), dr.reshape(-1)))
|
| 278 | 1 | tkerber | #
|
| 279 | 1 | tkerber | # # We keep the old step determination before we figure
|
| 280 | 1 | tkerber | # # out what is the best to do.
|
| 281 | 1 | tkerber | # maxstep = self.maxstep * np.sqrt(3 * len(self.atoms))
|
| 282 | 1 | tkerber | #
|
| 283 | 1 | tkerber | # # Finite difference step using temporary point
|
| 284 | 1 | tkerber | # self.atoms.positions += (du * self.dR)
|
| 285 | 1 | tkerber | # # Decide how much to move along the line du
|
| 286 | 1 | tkerber | # Fp1 = np.dot(f.reshape(-1), du.reshape(-1))
|
| 287 | 1 | tkerber | # Fp2 = np.dot(self.atoms.get_forces().reshape(-1), du.reshape(-1))
|
| 288 | 1 | tkerber | # CR = (Fp1 - Fp2) / self.dR
|
| 289 | 1 | tkerber | # #RdR = Fp1*0.1
|
| 290 | 1 | tkerber | # if CR < 0.0:
|
| 291 | 1 | tkerber | # #print "negcurve"
|
| 292 | 1 | tkerber | # RdR = maxstep
|
| 293 | 1 | tkerber | # #if(abs(RdR) > maxstep):
|
| 294 | 1 | tkerber | # # RdR = self.sign(RdR) * maxstep
|
| 295 | 1 | tkerber | # else:
|
| 296 | 1 | tkerber | # Fp = (Fp1 + Fp2) * 0.5
|
| 297 | 1 | tkerber | # RdR = Fp / CR
|
| 298 | 1 | tkerber | # if abs(RdR) > maxstep:
|
| 299 | 1 | tkerber | # RdR = np.sign(RdR) * maxstep
|
| 300 | 1 | tkerber | # else:
|
| 301 | 1 | tkerber | # RdR += self.dR * 0.5
|
| 302 | 1 | tkerber | # return du * RdR
|
| 303 | 1 | tkerber | |
| 304 | 1 | tkerber | class HessLBFGS(LBFGS): |
| 305 | 1 | tkerber | """Backwards compatibiliyt class"""
|
| 306 | 1 | tkerber | def __init__(self, *args, **kwargs): |
| 307 | 1 | tkerber | if 'method' in kwargs: |
| 308 | 1 | tkerber | del kwargs['method'] |
| 309 | 1 | tkerber | sys.stderr.write('Please use LBFGS instead of HessLBFGS!')
|
| 310 | 1 | tkerber | LBFGS.__init__(self, *args, **kwargs)
|
| 311 | 1 | tkerber | |
| 312 | 1 | tkerber | class LineLBFGS(LBFGSLineSearch): |
| 313 | 1 | tkerber | """Backwards compatibiliyt class"""
|
| 314 | 1 | tkerber | def __init__(self, *args, **kwargs): |
| 315 | 1 | tkerber | if 'method' in kwargs: |
| 316 | 1 | tkerber | del kwargs['method'] |
| 317 | 1 | tkerber | sys.stderr.write('Please use LBFGSLineSearch instead of LineLBFGS!')
|
| 318 | 1 | tkerber | LBFGSLineSearch.__init__(self, *args, **kwargs) |