Révision 46

database/import_gaussian_calc (revision 46)
2 2
# $Id$
3 3
import os, sys, argparse
4 4
from ase import Atoms
5
from ase.io import read
5
from ase.io.gaussian import read_gaussian_out
6 6
from ase.db import connect
7 7
from ase.calculators.gaussian import Gaussian
8
from ase.calculators.emt import EMT
8 9
from ase.constraints import FixAtoms
9 10
from numpy.linalg import norm
10 11
from bz2 import BZ2File
......
35 36
        # It was fine, give a normal answer
36 37
        return result
37 38

  
38
def import_gaussian_calculations(path,db,include_dir=[],exclude_dir=[],use_input=False,creator=os.environ['USER'], nproc=8):
39
def import_gaussian_calculations(path,db,include_dir=[],exclude_dir=[],use_input=False,project='gaussian',user=os.environ['USER'], nproc=8):
39 40
    #log = open('scan.log','w')
40 41
    _all = os.walk(os.path.abspath(path))
41 42
    calculator=None
......
44 45
    filenames = []
45 46
    if os.path.exists(db):
46 47
        con = connect(db)
47
        for row in con.select(code='GAUSSIAN'):
48
        for row in con.select(project='test'):
48 49
            filenames.append(row.filename)
49 50
    else:
50 51
        con = connect(db)
......
57 58
        condition1 = (read_all_dir or a[0].split('/')[-1] in include_dir)
58 59
        #condition2 = not files[0].split('/')[-1] in exclude_dir
59 60
        condition3 = [ i for i, x in enumerate(exclude_dir) if x in a[0] ] 
60
        _gass_out = [i for i in range(len(a[2])) if ('.log' in a[2][i] and a[2][i][-4:]=='.log')]
61
        _gass_out = [i for i in range(len(a[2])) if ('.log' in a[2][i])]
61 62
        if condition1 and not condition3:
62 63
            for f in _gass_out:
63
                filetoread = a[2][f]
64 64
                path = a[0]
65
                filetoread = path +'/'+ a[2][f]
65 66
                if not filetoread in filenames:
66 67
                    all_files.append(filetoread)
67 68
                    paths.append(path)
......
69 70
    multiprocessing.log_to_stderr()
70 71
    print 'using '+str(nproc)+' threads'
71 72
    pool = multiprocessing.Pool(nproc)
72
    results = []
73 73
    for filetoread,path in zip(all_files, paths):
74
        #result = pool.apply_async(LogExceptions(mp_worker), args=(con,files,use_input,filenames,xc_dict), callback=log_result)
75
        pool.apply_async(LogExceptions(mp_worker), args=(con,filetoread,path,use_input,filenames,xc_dict), callback=log_result)
74
        #def callback(result, func=LogExceptions(mp_worker)):
75
        #    results[func] = result
76
        result = pool.apply_async(LogExceptions(mp_worker), args=(con,files,use_input,filenames,xc_dict), callback=log_result)
76 77
    pool.close()
77 78
    pool.join()
78 79
    t3 = time()
80
    con = connect(db)
79 81
    print 'total time used: ', t3-t2, ' seconds'
80 82

  
81 83
def log_result(log):
82 84
    logfile = open('scan.log','a')
83 85
    print >> logfile, log
86
    return log
84 87

  
85
def mp_worker(con, filetoread, path, use_input,filenames,xc_dict):
88
def mp_worker(con, filetoread, path, use_input,filenames, user, project):
86 89
    try:
87 90
        t1 = time()
88
        filetoread = path +'/'+ filetoread
89
        Atoms = read(filetoread)
91
        Atoms, data = read_gaussian_out(filetoread,quantity='all')
90 92
        t2 = time()
91 93
        print 'timing: ', t2-t1
92
        if use_input:
93
            pass
94
            #constraints = read(path+'/input').constraints
95
            #Atoms.set_constraint(constraints)
96
            #print 'get constraint from input', filetoread
97
        else:
98
            Atoms0 = read(filetoread, index=0)
99
            pos0 = Atoms0.positions
100
            pos = Atoms.positions
101
            diff = pos - pos0
102
            disp = norm(diff, axis=1)
103
            if disp.sum() > 0:
104
                print 'find constraint from outcar'
105
                mask = [_disp == 0 for _disp in disp]
106
                constraints = FixAtoms(mask=mask)
107
                Atoms.set_constraint(constraints)
108
            else:
109
                print 'no constraint'
110
                constraints = None
111 94

  
112
        #read version number, XC, ENCUT etc.
113
        if filetoread[-4:] == '.bz2':
114
            fobj = BZ2File(filetoread)
115
        elif filetoread[-3:] == '.gz':
116
            fobj = GzipFile(filetoread)
117
        else:
118
            fobj = open(filetoread)
119
        with fobj as outcar:
120
            version = outcar.readline()
121
            line = outcar.readline()
122
            read_para = 0
123
            lsol = False
124
            ldipol = False
125
            while line != '':
126
                if line.startswith(' INCAR:'):
127
                    line = outcar.readline().split()
128
                    if line[1] == 'PAW_PBE':
129
                        pot = 'PAW'
130
                    else:
131
                        pot = line[1]
132
                elif line.startswith(' Dimension of arrays'):
133
                #if line.startswith(' Dimension of arrays'):
134
                    read_para = 1
135
                elif read_para:
136
                    if 'LEXCH' in line:
137
                        xc_flag = line.split()[2].upper()
138
                        if xc_flag not in xc_dict.keys():
139
                            raise ValueError('Unknown xc-functional flag found in POTCAR,'
140
                                             ' LEXCH=%s' % xc_flag)
141
                        xc = xc_dict[xc_flag]
142
                    elif 'NKPTS' in line:
143
                    #if 'NKPTS' in line:
144
                        nkpts = int(line.split()[3])
145
                    elif 'ENCUT' in line:
146
                        encut = float(line.split()[2])
147
                    elif 'ENAUG' in line:
148
                        enaug = float(line.split()[2])
149
                    elif 'NBANDS' in line:
150
                        nbands = int(line.split()[-1])
151
                    elif 'EDIFFG' in line:
152
                        f_limit = -float(line.split()[2])
153
                    elif 'LSOL' in line:
154
                        if line.split()[2] == 'T':
155
                            lsol = True
156
                    elif 'LDIPOL' in line:
157
                        if line.split()[2] == 'T':
158
                            ldipol = True
159
                    elif '-'*104 in line:
160
                        break
161
                line = outcar.readline()
162
        if constraints is not None:
163
            fmax = ((Atoms.get_forces()) ** 2).sum(1).max()**0.5
164
            if fmax < f_limit:
165
                #print >> log, 'updating the database with minimized calculation from file: ', fmax, files[2][f]
166
                _log = 'archived_a: '+filetoread+' '+str(fmax)
167
                #if use_input:
168
                #print os.getenv('USER')
169
                id = con.reserve(filename=filetoread)
170
                con.write(Atoms,functional = xc,path=path,code='GAUSSIAN',filename=filetoread, version=version, potential=pot, encut=encut, enaug=enaug, lsol=lsol, ldipol=ldipol, nkpts=nkpts, constraint=True)
171
                del con[id]
172
            else:
173
                #print >> log, 'not updating the database as fmax is too big for file: ', fmax, files[2][f]
174
                _log = 'skipped: '+filetoread+' '+str(fmax)
175
        else:
176
            #print >> log, 'updating the database with calculation', files[2][f]
177
            _log = 'archived_b: '+filetoread
178
            id = con.reserve(filename=filetoread)
179
            con.write(Atoms,functional = xc,creator=creator,path=path,code='GAUSSIAN',filename=filetoread, version=version, potential=pot, encut=encut, enaug=enaug, lsol=lsol, ldipol=ldipol, nkpts=nkpts, constraint=False)
180
            del con[id]
95
        _log = 'archived_a: '+filetoread
96
        id = con.reserve(filename=filetoread)
97
        con.write(Atoms,functional = data['Method'],charge=data['Charge'],basis_set=data['Basis_set'],path=path,filename=filetoread, version=data['Version'],project='test')
98
        del con[id]
99
        record = con.select(filename=filetoread)
100
#==================================
101
#This part will only exist temporarily, it is for importing other people's calculation
102
#For update the user and calculator keys, tjiang's hacked copy of ase is needed, 
103
#as these two are reserved keys that are not allowed to be updated manually
104
        id = next(record)['id'] 
105
        con.update(id,user='edumont')
106
#==================================
107
        con.update(id,calculator='gaussian')
108
        print _log 
181 109
    except (IndexError, ValueError):
182 110
        _log = 'failed: '+filetoread
183
    print _log 
184
    #_log.flush()
111
        print _log 
112
    ##_log.flush()
185 113
    return
186 114

  
187 115
if __name__ == '__main__':
......
192 120
    parser.add_argument("-e", "--exclude_dir",help="Directory to be excluded in the scanning")
193 121
    parser.add_argument("-d", help="Name of the database file to store the scanned calculations")
194 122
    parser.add_argument("-n", help="Number of processers")
123
    parser.add_argument("-j", help="Project name")
195 124
    args = parser.parse_args()
196 125
    if args.path is None:
197 126
        path = '.'
......
217 146
        nproc = 8
218 147
    else:
219 148
        nproc = int(args.n)
149
    if args.j is None:
150
        project = 'gaussian'
151
    else:
152
        project = int(args.j)
220 153

  
221
    import_gaussian_calculations(path, db, use_input=use_input, include_dir=include_dir, exclude_dir=exclude_dir, nproc=nproc)
154
    import_gaussian_calculations(path, db, use_input=use_input, include_dir=include_dir, exclude_dir=exclude_dir, project=project, nproc=nproc)

Formats disponibles : Unified diff