Statistiques
| Révision :

chimie4psmn / database / import_gaussian_calc @ 47

Historique | Voir | Annoter | Télécharger (5,57 ko)

1
#!/usr/bin/env python
2
# $Id$
3
import os, sys, argparse
4
from ase import Atoms
5
from ase.io.gaussian import read_gaussian_out
6
from ase.db import connect
7
from ase.calculators.gaussian import Gaussian
8
from ase.calculators.emt import EMT
9
from ase.constraints import FixAtoms
10
from numpy.linalg import norm
11
from bz2 import BZ2File
12
from gzip import GzipFile
13
from time import time
14
import multiprocessing
15
import traceback
16

    
17
def error(msg, *args):
18
    return multiprocessing.get_logger().error(msg, *args)
19

    
20
class LogExceptions(object):
21
    def __init__(self, callable):
22
        self.__callable = callable
23

    
24
    def __call__(self, *args, **kwargs):
25
        try:
26
            result = self.__callable(*args, **kwargs)
27

    
28
        except Exception as e:
29
            # Here we add some debugging help. If multiprocessing's
30
            # debugging is on, it will arrange to log the traceback
31
            error(traceback.format_exc())
32
            # Re-raise the original exception so the Pool worker can
33
            # clean up
34
            raise
35

    
36
        # It was fine, give a normal answer
37
        return result
38

    
39
def import_gaussian_calculations(path,db,include_dir=[],exclude_dir=[],use_input=False,project='gaussian',user=os.environ['USER'], nproc=8):
40
    #log = open('scan.log','w')
41
    _all = os.walk(os.path.abspath(path))
42
    calculator=None
43
    xc_dict = {'8': 'PBE', '91': 'PW91', 'CA': 'LDA'}
44
    read_all_dir = 0
45
    filenames = []
46
    if os.path.exists(db):
47
        con = connect(db)
48
        for row in con.select(project='test'):
49
            filenames.append(row.filename)
50
    else:
51
        con = connect(db)
52
    if len(include_dir) == 0:
53
        read_all_dir = 1
54
    all_files = []
55
    paths = []
56
    t1 = time()
57
    for i,a in enumerate(_all):
58
        condition1 = (read_all_dir or a[0].split('/')[-1] in include_dir)
59
        #condition2 = not files[0].split('/')[-1] in exclude_dir
60
        condition3 = [ i for i, x in enumerate(exclude_dir) if x in a[0] ] 
61
        _gass_out = [i for i in range(len(a[2])) if ('.log' in a[2][i])]
62
        if condition1 and not condition3:
63
            for f in _gass_out:
64
                path = a[0]
65
                filetoread = path +'/'+ a[2][f]
66
                if not filetoread in filenames:
67
                    all_files.append(filetoread)
68
                    paths.append(path)
69
    t2 = time()
70
    multiprocessing.log_to_stderr()
71
    print 'using '+str(nproc)+' threads'
72
    pool = multiprocessing.Pool(nproc)
73
    for filetoread,path in zip(all_files, paths):
74
        #def callback(result, func=LogExceptions(mp_worker)):
75
        #    results[func] = result
76
        result = pool.apply_async(LogExceptions(mp_worker), args=(con,files,use_input,filenames,xc_dict), callback=log_result)
77
    pool.close()
78
    pool.join()
79
    t3 = time()
80
    con = connect(db)
81
    print 'total time used: ', t3-t2, ' seconds'
82

    
83
def log_result(log):
84
    logfile = open('scan.log','a')
85
    print >> logfile, log
86
    return log
87

    
88
def mp_worker(con, filetoread, path, use_input,filenames, user, project):
89
    try:
90
        t1 = time()
91
        Atoms, data = read_gaussian_out(filetoread,quantity='all')
92
        t2 = time()
93
        print 'timing: ', t2-t1
94

    
95
        _log = 'archived_a: '+filetoread
96
        id = con.reserve(filename=filetoread)
97
        con.write(Atoms,functional = data['Method'],charge=data['Charge'],basis_set=data['Basis_set'],path=path,filename=filetoread, version=data['Version'],project='test')
98
        del con[id]
99
        record = con.select(filename=filetoread)
100
#==================================
101
#This part will only exist temporarily, it is for importing other people's calculation
102
#For update the user and calculator keys, tjiang's hacked copy of ase is needed, 
103
#as these two are reserved keys that are not allowed to be updated manually
104
        id = next(record)['id'] 
105
        con.update(id,user='edumont')
106
#==================================
107
        con.update(id,calculator='gaussian')
108
        print _log 
109
    except (IndexError, ValueError):
110
        _log = 'failed: '+filetoread
111
        print _log 
112
    ##_log.flush()
113
    return
114

    
115
if __name__ == '__main__':
116
    parser = argparse.ArgumentParser()
117
    parser.add_argument("path", help="The path under which the calculations will be scanned")
118
    parser.add_argument("-p", help="Use input in the same directory to determine the constraint", action="store_true")
119
    parser.add_argument("-i", "--include_dir",help="Directory to be included in the scanning")
120
    parser.add_argument("-e", "--exclude_dir",help="Directory to be excluded in the scanning")
121
    parser.add_argument("-d", help="Name of the database file to store the scanned calculations")
122
    parser.add_argument("-n", help="Number of processers")
123
    parser.add_argument("-u", help="The user who did the calculations")
124
    parser.add_argument("-j", help="Project name")
125
    args = parser.parse_args()
126
    if args.path is None:
127
        path = '.'
128
    else:
129
        path = args.path
130
    if args.p:
131
        use_input = True
132
    else:
133
        use_input = False
134
    if args.include_dir is None:
135
        include_dir = []
136
    else:
137
        include_dir = args.include_dir
138
    if args.exclude_dir is None:
139
        exclude_dir = []
140
    else:
141
        exclude_dir = args.exclude_dir
142
    if args.d is None:
143
        db = 'gaussian.db'
144
    else:
145
        db = args.d
146
    if args.n is None:
147
        nproc = 8
148
    else:
149
        nproc = int(args.n)
150
    if args.u is None:
151
        user = os.environ['USER']
152
    else:
153
        user = args.u
154
    if args.j is None:
155
        project = 'gaussian'
156
    else:
157
        project = int(args.j)
158

    
159
    import_gaussian_calculations(path, db, use_input=use_input, include_dir=include_dir, exclude_dir=exclude_dir, project=project, nproc=nproc,user=user)