Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / calculation.py @ 1d8c374e

Historique | Voir | Annoter | Télécharger (15,08 ko)

1
import os
2
import logging
3

    
4
logger = logging.getLogger('DockOnSurf')
5

    
6

    
7
def check_finished_calcs(run_type, code):
8
    from modules.utilities import _human_key
9
    """Returns two lists of calculations finished normally and abnormally.
10

11
    @param run_type: The type of calculation to check.
12
    @param code: The code used for the specified job.
13
    @return finished_calcs: List of calculation directories that have finished 
14
    normally.
15
    @return unfinished_calcs: List of calculation directories that have finished 
16
    abnormally.
17
    """
18
    from glob import glob
19
    import ase.io
20
    from modules.utilities import tail, is_binary
21

    
22
    finished_calcs = []
23
    unfinished_calcs = []
24
    for conf_dir in sorted(os.listdir(run_type), key=_human_key):
25
        conf_path = f'{run_type}/{conf_dir}/'
26
        if not os.path.isdir(conf_path) or 'conf_' not in conf_dir:
27
            continue
28
        if code == 'cp2k':
29
            restart_file_list = glob(f"{conf_path}/*-1.restart")
30
            if len(restart_file_list) == 0:
31
                logger.warning(f"No *-1.restart file found on {conf_path}.")
32
                unfinished_calcs.append(conf_dir)
33
                continue
34
            elif len(restart_file_list) > 1:
35
                warn_msg = f'There is more than one CP2K restart file ' \
36
                           f'(*-1.restart / in {conf_path}: ' \
37
                           f'{restart_file_list}. Skipping directory.'
38
                unfinished_calcs.append(conf_dir)
39
                logger.warning(warn_msg)
40
                continue
41
            out_files = []
42
            for file in os.listdir(conf_path):
43
                if is_binary(conf_path+file):
44
                    continue
45
                with open(conf_path+file, "rb") as out_fh:
46
                    tail_out_str = tail(out_fh)
47
                if tail_out_str.count("PROGRAM STOPPED IN") == 1:
48
                    out_files.append(file)
49
            if len(out_files) > 1:
50
                warn_msg = f'There is more than one CP2K output file in ' \
51
                           f'{conf_path}: {out_files}. Skipping directory.'
52
                logger.warning(warn_msg)
53
                unfinished_calcs.append(conf_dir)
54
            elif len(out_files) == 0:
55
                warn_msg = f'There is no CP2K output file in {conf_path}. ' \
56
                           'Skipping directory.'
57
                logger.warning(warn_msg)
58
                unfinished_calcs.append(conf_dir)
59
            else:
60
                finished_calcs.append(conf_dir)
61
        elif code == 'vasp':
62
            out_file_list = glob(f"{conf_path}/OUTCAR")
63
            if len(out_file_list) == 0:
64
                unfinished_calcs.append(conf_dir)
65
            elif len(out_file_list) > 1:
66
                warn_msg = f'There is more than one file matching the {code} ' \
67
                           f'pattern for finished calculation (*.out / ' \
68
                           f'*-1.restart) in {conf_path}: ' \
69
                           f'{out_file_list}. Skipping directory.'
70
                logger.warning(warn_msg)
71
                unfinished_calcs.append(conf_dir)
72
            else:
73
                try:
74
                    ase.io.read(f"{conf_path}/OUTCAR")
75
                except ValueError:
76
                    unfinished_calcs.append(conf_dir)
77
                    continue
78
                except IndexError:
79
                    unfinished_calcs.append(conf_dir)
80
                    continue
81
                with open(f"{conf_path}/OUTCAR", 'rb') as out_fh:
82
                    if "General timing and accounting" not in tail(out_fh):
83
                        unfinished_calcs.append(conf_dir)
84
                    else:
85
                        finished_calcs.append(conf_dir)
86
        else:
87
            err_msg = f"Check not implemented for '{code}'."
88
            logger.error(err_msg)
89
            raise NotImplementedError(err_msg)
90
    return finished_calcs, unfinished_calcs
91

    
92

    
93
def prep_cp2k(inp_file: str, run_type: str, atms_list: list, proj_name: str):
94
    """Prepares the directories to run calculations with CP2K.
95

96
    @param inp_file: CP2K Input file to run the calculations with.
97
    @param run_type: Type of calculation. 'isolated', 'screening' or
98
        'refinement'
99
    @param atms_list: list of ase.Atoms objects to run the calculation of.
100
    @param proj_name: name of the project
101
    @return: None
102
    """
103
    from shutil import copy
104
    from pycp2k import CP2K
105
    from modules.utilities import check_bak
106
    if not isinstance(inp_file, str):
107
        err_msg = "'inp_file' must be a string with the path of the CP2K " \
108
                  "input file."
109
        logger.error(err_msg)
110
        raise ValueError(err_msg)
111
    cp2k = CP2K()
112
    cp2k.parse(inp_file)
113
    cp2k.CP2K_INPUT.GLOBAL.Project_name = proj_name+"_"+run_type
114
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
115
    if force_eval.SUBSYS.TOPOLOGY.Coord_file_name is None:
116
        logger.warning("'COORD_FILE_NAME' not specified on CP2K input. Using\n"
117
                       "'coord.xyz'. A new CP2K input file with "
118
                       "the 'COORD_FILE_NAME' variable is created.")
119
        force_eval.SUBSYS.TOPOLOGY.Coord_file_name = 'coord.xyz'
120
        check_bak(inp_file.split('/')[-1])
121
    cp2k.write_input_file(inp_file.split('/')[-1])
122

    
123
    coord_file = force_eval.SUBSYS.TOPOLOGY.Coord_file_name
124

    
125
    # Creating and setting up directories for every configuration.
126
    for i, conf in enumerate(atms_list):
127
        subdir = f'{run_type}/conf_{i}/'
128
        os.mkdir(subdir)
129
        copy(inp_file, subdir)
130
        conf.write(subdir + coord_file)
131

    
132

    
133
def prep_vasp(inp_files, run_type, atms_list, proj_name, cell, potcar_dir):
134
    """Prepares the directories to run calculations with VASP.
135

136
    @param inp_files: VASP Input files to run the calculations with.
137
    @param run_type: Type of calculation. 'isolated', 'screening' or
138
        'refinement'
139
    @param atms_list: list of ase.Atoms objects to run the calculation of.
140
    @param proj_name: name of the project.
141
    @param cell: Cell for the Periodic Boundary Conditions.
142
    @param potcar_dir: Directory to find POTCARs for each element.
143
    @return: None
144
    """
145
    from shutil import copy
146
    import os
147

    
148
    import numpy as np
149
    from pymatgen.io.vasp.inputs import Incar
150

    
151
    if not potcar_dir:
152
        mand_files = ["INCAR", "KPOINTS", "POTCAR"]
153
    elif any("POTCAR" in inp_file for inp_file in inp_files):
154
        mand_files = ["INCAR", "KPOINTS", "POTCAR"]
155
    else:
156
        mand_files = ["INCAR", "KPOINTS"]
157

    
158
    # Check that there are many specified files
159
    if not isinstance(inp_files, list) and all(isinstance(inp_file, str)
160
                                               for inp_file in inp_files):
161
        err_msg = "'inp_files' should be a list of file names/paths"
162
        logger.error(err_msg)
163
        ValueError(err_msg)
164
    # Check that all mandatory files are defined
165
    elif any(not any(mand_file in inp_file.split("/")[-1]
166
                     for inp_file in inp_files) for mand_file in mand_files):
167
        err_msg = f"At least one of the mandatory files {mand_files} was " \
168
                  "not specified."
169
        logger.error(err_msg)
170
        raise FileNotFoundError(err_msg)
171
    # Check that the defined files exist
172
    elif any(not os.path.isfile(inp_file) for inp_file in inp_files):
173
        err_msg = f"At least one of the mandatory files {mand_files} was " \
174
                  "not found."
175
        logger.error(err_msg)
176
        raise FileNotFoundError(err_msg)
177
    incar = ""
178
    for i, inp_file in enumerate(inp_files):
179
        file_name = inp_file.split("/")[-1]
180
        if "INCAR" in file_name:
181
            incar = Incar.from_file(inp_file)
182
            incar["SYSTEM"] = proj_name+"_"+run_type
183

    
184
    for c, conf in enumerate(atms_list):
185
        subdir = f'{run_type}/conf_{c}/'
186
        os.mkdir(subdir)
187
        for inp_file in inp_files:
188
            file_name = inp_file.split("/")[-1]
189
            if "INCAR" in file_name:
190
                incar.write_file(subdir+"INCAR")
191
            elif "KPOINTS" in file_name and "KPOINTS" != file_name:
192
                copy(inp_file, subdir+"KPOINTS")
193
            elif "POTCAR" in file_name and "POTCAR" != file_name:
194
                copy(inp_file, subdir+"POTCAR")
195
            else:
196
                copy(inp_file, subdir)
197
        if cell is not False and np.linalg.det(cell) != 0.0:
198
            conf.pbc = True
199
            conf.cell = cell
200
            conf.center()
201
        elif np.linalg.det(conf.cell) == 0:
202
            err_msg = "Cell is not defined"
203
            logger.error(err_msg)
204
            raise ValueError(err_msg)
205
        conf.write(subdir+"POSCAR", format="vasp")
206
        if "POTCAR" not in mand_files and potcar_dir:
207
            poscar_fh = open(subdir+"POSCAR", "r")
208
            grouped_symbols = poscar_fh.readline().split()
209
            poscar_fh.close()
210
            for symbol in grouped_symbols:
211
                potcar_sym_fh = open(f"{potcar_dir}/{symbol}/POTCAR", "r")
212
                potcar_sym_str = potcar_sym_fh.read()
213
                potcar_sym_fh.close()
214
                potcar_fh = open(subdir+"POTCAR", "a")
215
                potcar_fh.write(potcar_sym_str)
216
                potcar_fh.close()
217

    
218

    
219
def get_jobs_status(job_ids, stat_cmd, stat_dict):
220
    """Returns a list of job status for a list of job ids.
221

222
    @param job_ids: list of all jobs to be checked their status.
223
    @param stat_cmd: Command to check job status.
224
    @param stat_dict: Dictionary with pairs of job status (r, p, f) and the
225
        pattern it matches in the output of the stat_cmd.
226
    @return: list of status for every job.
227
    """
228
    from subprocess import PIPE, Popen
229
    status_list = []
230
    for job in job_ids:
231
        stat_msg = Popen(stat_cmd % job, shell=True,
232
                         stdout=PIPE).communicate()[0].decode('utf-8').strip()
233
        if stat_dict['r'] == stat_msg:
234
            status_list.append('r')
235
        elif stat_dict['p'] == stat_msg:
236
            status_list.append('p')
237
        elif stat_dict['f'] == stat_msg:
238
            status_list.append('f')
239
        else:
240
            logger.warning(f'Unrecognized job {job} status: {stat_msg}')
241
    return status_list
242

    
243

    
244
def submit_jobs(run_type, sub_cmd, sub_script, stat_cmd, stat_dict, max_jobs,
245
                name):
246
    """Submits jobs to a custom queuing system with the provided script
247

248
    @param run_type: Type of calculation. 'isolated', 'screening', 'refinement'
249
    @param sub_cmd: Bash command used to submit jobs.
250
    @param sub_script: script for the job submission.
251
    @param stat_cmd: Bash command to check job status.
252
    @param stat_dict: Dictionary with pairs of job status: r, p, f (ie. running
253
        pending and finished) and the pattern it matches in the output of the
254
        stat_cmd.
255
    @param max_jobs: dict: Contains the maximum number of jobs to be both
256
        running, pending/queued and pending+running. When the relevant maximum
257
        is reached no jobs more are submitted.
258
    @param name: name of the project.
259
    """
260
    from shutil import copy
261
    from time import sleep
262
    from subprocess import PIPE, Popen
263
    from modules.utilities import _human_key
264
    subm_jobs = []
265
    init_dir = os.getcwd()
266
    for conf in sorted(os.listdir(run_type), key=_human_key):
267
        i = conf.split('_')[1]
268
        while get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") + \
269
                get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
270
                >= max_jobs['rp']\
271
                or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") \
272
                >= max_jobs['r'] \
273
                or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
274
                >= max_jobs['p']:
275
            sleep(30)
276
        copy(sub_script, f"{run_type}/{conf}")
277
        os.chdir(f"{run_type}/{conf}")
278
        job_name = f'{name[:5]}{run_type[:3].capitalize()}{i}'
279
        sub_order = sub_cmd % (job_name, sub_script)
280
        subm_msg = Popen(sub_order, shell=True, stdout=PIPE).communicate()[0]
281
        job_id = None
282
        for word in subm_msg.decode("utf-8").split():
283
            try:
284
                job_id = int(word.replace('>', '').replace('<', ''))
285
                break
286
            except ValueError:
287
                continue
288
        subm_jobs.append(job_id)
289
        os.chdir(init_dir)
290

    
291
    logger.info('All jobs have been submitted, waiting for them to finish.')
292
    while not all([stat == 'f' for stat in
293
                   get_jobs_status(subm_jobs, stat_cmd, stat_dict)]):
294
        sleep(30)
295
    logger.info('All jobs have finished.')
296

    
297

    
298
def run_calc(run_type, inp_vars, atms_list):
299
    """Directs the calculation run according to the provided arguments.
300

301
    @param run_type: Type of calculation. 'isolated', 'screening' or
302
    'refinement'
303
    @param inp_vars: Calculation parameters from input file.
304
    @param atms_list: List of ase.Atoms objects containing the sets of atoms
305
    aimed to run the calculations of.
306
    """
307
    from modules.utilities import check_bak
308

    
309
    run_types = ['isolated', 'screening', 'refinement']
310
    if not isinstance(run_type, str) or run_type.lower() not in run_types:
311
        run_type_err = f"'run_type' must be one of the following: {run_types}"
312
        logger.error(run_type_err)
313
        raise ValueError(run_type_err)
314

    
315
    if inp_vars['batch_q_sys']:
316
        logger.info(f"Running {run_type} calculation with {inp_vars['code']} on"
317
                    f" {inp_vars['batch_q_sys']}.")
318
    else:
319
        logger.info(f"Doing a dry run of {run_type}.")
320
    check_bak(run_type)
321
    os.mkdir(run_type)
322

    
323
    # Prepare directories and files for relevant code.
324
    input_files = {'isolated': 'isol_inp_file', 'screening': 'screen_inp_file',
325
                   'refinement': 'refine_inp_file', }
326
    if inp_vars['code'] == 'cp2k':
327
        prep_cp2k(inp_vars[input_files[run_type]], run_type, atms_list,
328
                  inp_vars['project_name'])
329
    elif inp_vars['code'] == "vasp":
330
        prep_vasp(inp_vars[input_files[run_type]], run_type, atms_list,
331
                  inp_vars['project_name'], inp_vars['pbc_cell'],
332
                  inp_vars['potcar_dir'])
333
    # elif: inp_vars['code'] == 'Other codes here'
334

    
335
    # Submit/run Jobs
336
    if inp_vars['batch_q_sys'] == 'sge':
337
        stat_cmd = "qstat | grep %s | awk '{print $5}'"
338
        stat_dict = {'r': 'r', 'p': 'qw', 'f': ''}
339
        submit_jobs(run_type, 'qsub -N %s %s', inp_vars['subm_script'],
340
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
341
                    inp_vars['project_name'])
342
    elif inp_vars['batch_q_sys'] == 'lsf':
343
        stat_cmd = "bjobs -w | grep %s | awk '{print $3}'"
344
        stat_dict = {'r': 'RUN', 'p': 'PEND', 'f': ''}
345
        submit_jobs(run_type, 'bsub -J %s < %s', inp_vars['subm_script'],
346
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
347
                    inp_vars['project_name'])
348
    elif inp_vars['batch_q_sys'] == 'irene':
349
        stat_cmd = "ccc_mstat | grep %s | awk '{print $10}' | cut -c1"
350
        stat_dict = {'r': 'R', 'p': 'P', 'f': ''}
351
        submit_jobs(run_type, 'ccc_msub -r %s %s', inp_vars['subm_script'],
352
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
353
                    inp_vars['project_name'])
354

    
355
    elif inp_vars['batch_q_sys'] == 'local':
356
        pass  # TODO implement local
357
    elif not inp_vars['batch_q_sys']:
358
        pass
359
    else:
360
        err_msg = "Unknown value for 'batch_q_sys'."
361
        logger.error(err_msg)
362
        raise ValueError(err_msg)