Statistiques
| Révision :

chimie4psmn / database / import_gaussian_calc

Historique | Voir | Annoter | Télécharger (5,62 ko)

1
#!/usr/bin/env python
2
# $Id$
3
import os, sys, argparse
4
from ase import Atoms
5
from ase.io.gaussian import read_gaussian_out
6
from ase.db import connect
7
from ase.calculators.gaussian import Gaussian
8
from ase.calculators.emt import EMT
9
from ase.constraints import FixAtoms
10
from numpy.linalg import norm
11
from bz2 import BZ2File
12
from gzip import GzipFile
13
from time import time
14
import multiprocessing
15
import traceback
16

    
17
def error(msg, *args):
18
    return multiprocessing.get_logger().error(msg, *args)
19

    
20
class LogExceptions(object):
21
    def __init__(self, callable):
22
        self.__callable = callable
23

    
24
    def __call__(self, *args, **kwargs):
25
        try:
26
            result = self.__callable(*args, **kwargs)
27

    
28
        except Exception as e:
29
            # Here we add some debugging help. If multiprocessing's
30
            # debugging is on, it will arrange to log the traceback
31
            error(traceback.format_exc())
32
            # Re-raise the original exception so the Pool worker can
33
            # clean up
34
            raise
35

    
36
        # It was fine, give a normal answer
37
        return result
38

    
39
def import_gaussian_calculations(path,db,include_dir=[],exclude_dir=[],use_input=False,project='gaussian',user=os.environ['USER'], nproc=8):
40
    #log = open('scan.log','w')
41
    _all = os.walk(os.path.abspath(path))
42
    calculator=None
43
    xc_dict = {'8': 'PBE', '91': 'PW91', 'CA': 'LDA'}
44
    read_all_dir = 0
45
    filenames = []
46
    if os.path.exists(db):
47
        con = connect(db)
48
        for row in con.select(project='test'):
49
            filenames.append(row.filename)
50
    else:
51
        con = connect(db)
52
    if len(include_dir) == 0:
53
        read_all_dir = 1
54
    all_files = []
55
    paths = []
56
    t1 = time()
57
    for i,a in enumerate(_all):
58
        condition1 = (read_all_dir or a[0].split('/')[-1] in include_dir)
59
        #condition2 = not files[0].split('/')[-1] in exclude_dir
60
        condition3 = [ i for i, x in enumerate(exclude_dir) if x in a[0] ] 
61
        _gass_out = [i for i in range(len(a[2])) if ('.log' in a[2][i])]
62
        if condition1 and not condition3:
63
            for f in _gass_out:
64
                path = a[0]
65
                filetoread = path +'/'+ a[2][f]
66
                if not filetoread in filenames:
67
                    all_files.append(filetoread)
68
                    paths.append(path)
69
    t2 = time()
70
    multiprocessing.log_to_stderr()
71
    print 'using '+str(nproc)+' threads'
72
    pool = multiprocessing.Pool(nproc)
73
    for filetoread,path in zip(all_files, paths):
74
        #def callback(result, func=LogExceptions(mp_worker)):
75
        #    results[func] = result
76
        result = pool.apply_async(LogExceptions(mp_worker), args=(con,files,use_input,filenames,xc_dict), callback=log_result)
77
    pool.close()
78
    pool.join()
79
    t3 = time()
80
    con = connect(db)
81
    print 'total time used: ', t3-t2, ' seconds'
82

    
83
def log_result(log):
84
    logfile = open('scan.log','a')
85
    print >> logfile, log
86
    return log
87

    
88
def mp_worker(con, filetoread, path, use_input,filenames, user, project):
89
    try:
90
        t1 = time()
91
        Atoms, data = read_gaussian_out(filetoread,quantity='all')
92
        t2 = time()
93
        print 'timing: ', t2-t1
94

    
95
        _log = 'archived_a: '+filetoread
96
        id = con.reserve(filename=filetoread)
97
        con.write(Atoms,functional = data['Method'],charge=data['Charge'],basis_set=data['Basis_set'],path=path,filename=filetoread, version=data['Version'],project='test')
98
        del con[id]
99
        record = con.select(filename=filetoread)
100
#==================================
101
#This part will only exist temporarily, it is for importing other people's calculation
102
#For update the user and calculator keys, tjiang's hacked copy of ase is needed, 
103
#as these two are reserved keys that are not allowed to be updated manually
104
        id = next(record)['id'] 
105
        con.update(id,user='edumont')
106
#==================================
107
        con.update(id,calculator='gaussian')
108
        print _log 
109
    except (IndexError, ValueError):
110
        _log = 'failed: '+filetoread
111
        print _log 
112
    ##_log.flush()
113
    return
114

    
115
if __name__ == '__main__':
116
    parser = argparse.ArgumentParser()
117
    parser.add_argument("path", help="The path under which the calculations will be scanned")
118
    parser.add_argument("-p", help="Use input in the same directory to determine the constraint", action="store_true")
119
    parser.add_argument("-i", "--include_dir",help="Directory to be included in the scanning")
120
    parser.add_argument("-e", "--exclude_dir",help="Directory to be excluded in the scanning")
121
    parser.add_argument("-d", help="Name of the database file to store the scanned calculations")
122
    parser.add_argument("-n", help="Number of processers")
123
    parser.add_argument("-u", help="The user who did the calculations")
124
    parser.add_argument("-j", help="Project name")
125
    args = parser.parse_args()
126
    nproc = multiprocessing.cpu_count()
127
    if args.path is None:
128
        path = '.'
129
    else:
130
        path = args.path
131
    if args.p:
132
        use_input = True
133
    else:
134
        use_input = False
135
    if args.include_dir is None:
136
        include_dir = []
137
    else:
138
        include_dir = args.include_dir
139
    if args.exclude_dir is None:
140
        exclude_dir = []
141
    else:
142
        exclude_dir = args.exclude_dir
143
    if args.d is None:
144
        db = 'gaussian.db'
145
    else:
146
        db = args.d
147
    if args.n is None:
148
        nproc = nproc / 2
149
    else:
150
        nproc = int(args.n)
151
    if args.u is None:
152
        user = os.environ['USER']
153
    else:
154
        user = args.u
155
    if args.j is None:
156
        project = 'gaussian'
157
    else:
158
        project = int(args.j)
159

    
160
    import_gaussian_calculations(path, db, use_input=use_input, include_dir=include_dir, exclude_dir=exclude_dir, project=project, nproc=nproc,user=user)