Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / calculation.py @ cf980c86

Historique | Voir | Annoter | Télécharger (15,68 ko)

1
"""Module for the submission or execution of the calculations to be done
2

3
functions:
4
check_finished_calcs: Checks if the calculations finished normally or not.
5
prep_cp2k: Prepares the directories to run calculations with CP2K.
6
prep_vasp: Prepares the directories to run calculations with VASP.
7
get_jobs_status: Returns a list of job status for a list of job ids.
8
submit_jobs: Submits jobs to a custom queuing system with the provided script
9
run_calc: Directs calculation run/submission.
10
"""
11

    
12
import os
13
import logging
14

    
15
logger = logging.getLogger('DockOnSurf')
16

    
17

    
18
def check_finished_calcs(run_type, code):
19
    """Checks if the calculations finished normally or not.
20

21
    @param run_type: The type of calculation to check.
22
    @param code: The code used for the specified job.
23
    @return finished_calcs: List of calculation directories that have finished
24
    normally.
25
    @return unfinished_calcs: List of calculation directories that have finished
26
    abnormally.
27
    """
28
    from modules.utilities import _human_key
29
    from glob import glob
30
    import ase.io
31
    from modules.utilities import tail, is_binary
32

    
33
    finished_calcs = []
34
    unfinished_calcs = []
35
    for conf_dir in sorted(os.listdir(run_type), key=_human_key):
36
        conf_path = f'{run_type}/{conf_dir}/'
37
        if not os.path.isdir(conf_path) or 'conf_' not in conf_dir:
38
            continue
39
        if code == 'cp2k':
40
            restart_file_list = glob(f"{conf_path}/*-1.restart")
41
            if len(restart_file_list) == 0:
42
                logger.warning(f"No *-1.restart file found on {conf_path}.")
43
                unfinished_calcs.append(conf_dir)
44
                continue
45
            elif len(restart_file_list) > 1:
46
                warn_msg = f'There is more than one CP2K restart file ' \
47
                           f'(*-1.restart / in {conf_path}: ' \
48
                           f'{restart_file_list}. Skipping directory.'
49
                unfinished_calcs.append(conf_dir)
50
                logger.warning(warn_msg)
51
                continue
52
            out_files = []
53
            for file in os.listdir(conf_path):
54
                if is_binary(conf_path+file):
55
                    continue
56
                with open(conf_path+file, "rb") as out_fh:
57
                    tail_out_str = tail(out_fh)
58
                if tail_out_str.count("PROGRAM STOPPED IN") == 1:
59
                    out_files.append(file)
60
            if len(out_files) > 1:
61
                warn_msg = f'There is more than one CP2K output file in ' \
62
                           f'{conf_path}: {out_files}. Skipping directory.'
63
                logger.warning(warn_msg)
64
                unfinished_calcs.append(conf_dir)
65
            elif len(out_files) == 0:
66
                warn_msg = f'There is no CP2K output file in {conf_path}. ' \
67
                           'Skipping directory.'
68
                logger.warning(warn_msg)
69
                unfinished_calcs.append(conf_dir)
70
            else:
71
                finished_calcs.append(conf_dir)
72
        elif code == 'vasp':
73
            out_file_list = glob(f"{conf_path}/OUTCAR")
74
            if len(out_file_list) == 0:
75
                unfinished_calcs.append(conf_dir)
76
            elif len(out_file_list) > 1:
77
                warn_msg = f'There is more than one file matching the {code} ' \
78
                           f'pattern for finished calculation (*.out / ' \
79
                           f'*-1.restart) in {conf_path}: ' \
80
                           f'{out_file_list}. Skipping directory.'
81
                logger.warning(warn_msg)
82
                unfinished_calcs.append(conf_dir)
83
            else:
84
                try:
85
                    ase.io.read(f"{conf_path}/OUTCAR")
86
                except ValueError:
87
                    unfinished_calcs.append(conf_dir)
88
                    continue
89
                except IndexError:
90
                    unfinished_calcs.append(conf_dir)
91
                    continue
92
                with open(f"{conf_path}/OUTCAR", 'rb') as out_fh:
93
                    if "General timing and accounting" not in tail(out_fh):
94
                        unfinished_calcs.append(conf_dir)
95
                    else:
96
                        finished_calcs.append(conf_dir)
97
        else:
98
            err_msg = f"Check not implemented for '{code}'."
99
            logger.error(err_msg)
100
            raise NotImplementedError(err_msg)
101
    return finished_calcs, unfinished_calcs
102

    
103

    
104
def prep_cp2k(inp_file: str, run_type: str, atms_list: list, proj_name: str):
105
    """Prepares the directories to run calculations with CP2K.
106

107
    @param inp_file: CP2K Input file to run the calculations with.
108
    @param run_type: Type of calculation. 'isolated', 'screening' or
109
        'refinement'
110
    @param atms_list: list of ase.Atoms objects to run the calculation of.
111
    @param proj_name: name of the project
112
    @return: None
113
    """
114
    from shutil import copy
115
    from pycp2k import CP2K
116
    from modules.utilities import check_bak
117
    if not isinstance(inp_file, str):
118
        err_msg = "'inp_file' must be a string with the path of the CP2K " \
119
                  "input file."
120
        logger.error(err_msg)
121
        raise ValueError(err_msg)
122
    cp2k = CP2K()
123
    cp2k.parse(inp_file)
124
    cp2k.CP2K_INPUT.GLOBAL.Project_name = proj_name+"_"+run_type
125
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
126
    if force_eval.SUBSYS.TOPOLOGY.Coord_file_name is None:
127
        logger.warning("'COORD_FILE_NAME' not specified on CP2K input. Using\n"
128
                       "'coord.xyz'. A new CP2K input file with "
129
                       "the 'COORD_FILE_NAME' variable is created.")
130
        force_eval.SUBSYS.TOPOLOGY.Coord_file_name = 'coord.xyz'
131
        check_bak(inp_file.split('/')[-1])
132
    new_inp_file = inp_file.split('/')[-1]
133
    cp2k.write_input_file(new_inp_file)
134

    
135
    coord_file = force_eval.SUBSYS.TOPOLOGY.Coord_file_name
136

    
137
    # Creating and setting up directories for every configuration.
138
    for i, conf in enumerate(atms_list):
139
        subdir = f'{run_type}/conf_{i}/'
140
        os.mkdir(subdir)
141
        copy(new_inp_file, subdir)
142
        conf.write(subdir + coord_file)
143

    
144

    
145
def prep_vasp(inp_files, run_type, atms_list, proj_name, cell, potcar_dir):
146
    """Prepares the directories to run calculations with VASP.
147

148
    @param inp_files: VASP Input files to run the calculations with.
149
    @param run_type: Type of calculation. 'isolated', 'screening' or
150
        'refinement'
151
    @param atms_list: list of ase.Atoms objects to run the calculation of.
152
    @param proj_name: name of the project.
153
    @param cell: Cell for the Periodic Boundary Conditions.
154
    @param potcar_dir: Directory to find POTCARs for each element.
155
    @return: None
156
    """
157
    from shutil import copy
158
    import os
159

    
160
    import numpy as np
161
    from pymatgen.io.vasp.inputs import Incar
162

    
163
    if not potcar_dir:
164
        mand_files = ["INCAR", "KPOINTS", "POTCAR"]
165
    elif any("POTCAR" in inp_file for inp_file in inp_files):
166
        mand_files = ["INCAR", "KPOINTS", "POTCAR"]
167
    else:
168
        mand_files = ["INCAR", "KPOINTS"]
169

    
170
    # Check that there are many specified files
171
    if not isinstance(inp_files, list) and all(isinstance(inp_file, str)
172
                                               for inp_file in inp_files):
173
        err_msg = "'inp_files' should be a list of file names/paths"
174
        logger.error(err_msg)
175
        ValueError(err_msg)
176
    # Check that all mandatory files are defined
177
    elif any(not any(mand_file in inp_file.split("/")[-1]
178
                     for inp_file in inp_files) for mand_file in mand_files):
179
        err_msg = f"At least one of the mandatory files {mand_files} was " \
180
                  "not specified."
181
        logger.error(err_msg)
182
        raise FileNotFoundError(err_msg)
183
    # Check that the defined files exist
184
    elif any(not os.path.isfile(inp_file) for inp_file in inp_files):
185
        err_msg = f"At least one of the mandatory files {mand_files} was " \
186
                  "not found."
187
        logger.error(err_msg)
188
        raise FileNotFoundError(err_msg)
189
    incar = ""
190
    for i, inp_file in enumerate(inp_files):
191
        file_name = inp_file.split("/")[-1]
192
        if "INCAR" in file_name:
193
            incar = Incar.from_file(inp_file)
194
            incar["SYSTEM"] = proj_name+"_"+run_type
195

    
196
    # Builds the directory hierarchy and copies/creates the relevant files
197
    for c, conf in enumerate(atms_list):
198
        subdir = f'{run_type}/conf_{c}/'
199
        os.mkdir(subdir)
200
        for inp_file in inp_files:
201
            file_name = inp_file.split("/")[-1]
202
            if "INCAR" in file_name:
203
                incar.write_file(subdir+"INCAR")
204
            elif "KPOINTS" in file_name and "KPOINTS" != file_name:
205
                copy(inp_file, subdir+"KPOINTS")
206
            elif "POTCAR" in file_name and "POTCAR" != file_name:
207
                copy(inp_file, subdir+"POTCAR")
208
            else:
209
                copy(inp_file, subdir)
210
        if cell is not False and np.linalg.det(cell) != 0.0:
211
            conf.pbc = True
212
            conf.cell = cell
213
            conf.center()
214
        elif np.linalg.det(conf.cell) == 0:
215
            err_msg = "Cell is not defined"
216
            logger.error(err_msg)
217
            raise ValueError(err_msg)
218
        conf.write(subdir+"POSCAR", format="vasp")
219
        if "POTCAR" not in mand_files and potcar_dir:  # TODO make just once
220
            poscar_fh = open(subdir+"POSCAR", "r")
221
            grouped_symbols = poscar_fh.readline().split()
222
            poscar_fh.close()
223
            for symbol in grouped_symbols:
224
                potcar_sym_fh = open(f"{potcar_dir}/{symbol}/POTCAR", "r")
225
                potcar_sym_str = potcar_sym_fh.read()
226
                potcar_sym_fh.close()
227
                potcar_fh = open(subdir+"POTCAR", "a")
228
                potcar_fh.write(potcar_sym_str)
229
                potcar_fh.close()
230

    
231

    
232
def get_jobs_status(job_ids, stat_cmd, stat_dict):
233
    """Returns a list of job status for a list of job ids.
234

235
    @param job_ids: list of all jobs to be checked their status.
236
    @param stat_cmd: Command to check job status.
237
    @param stat_dict: Dictionary with pairs of job status (r, p, f) and the
238
        pattern it matches in the output of the stat_cmd.
239
    @return: list of status for every job.
240
    """
241
    from subprocess import PIPE, Popen
242
    status_list = []
243
    for job in job_ids:
244
        stat_msg = Popen(stat_cmd % job, shell=True,
245
                         stdout=PIPE).communicate()[0].decode('utf-8').strip()
246
        if stat_dict['r'] == stat_msg:
247
            status_list.append('r')
248
        elif stat_dict['p'] == stat_msg:
249
            status_list.append('p')
250
        elif stat_dict['f'] == stat_msg:
251
            status_list.append('f')
252
        else:
253
            logger.warning(f'Unrecognized job {job} status: {stat_msg}')
254
    return status_list
255

    
256

    
257
def submit_jobs(run_type, sub_cmd, sub_script, stat_cmd, stat_dict, max_jobs,
258
                name):
259
    """Submits jobs to a custom queuing system with the provided script
260

261
    @param run_type: Type of calculation. 'isolated', 'screening', 'refinement'
262
    @param sub_cmd: Bash command used to submit jobs.
263
    @param sub_script: script for the job submission.
264
    @param stat_cmd: Bash command to check job status.
265
    @param stat_dict: Dictionary with pairs of job status: r, p, f (ie. running
266
        pending and finished) and the pattern it matches in the output of the
267
        stat_cmd.
268
    @param max_jobs: dict: Contains the maximum number of jobs to be both
269
        running, pending/queued and pending+running. When the relevant maximum
270
        is reached no jobs more are submitted.
271
    @param name: name of the project.
272
    """
273
    from shutil import copy
274
    from time import sleep
275
    from subprocess import PIPE, Popen
276
    from modules.utilities import _human_key
277
    subm_jobs = []
278
    init_dir = os.getcwd()
279
    for conf in sorted(os.listdir(run_type), key=_human_key):
280
        i = conf.split('_')[1]
281
        while get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") + \
282
                get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
283
                >= max_jobs['rp']\
284
                or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") \
285
                >= max_jobs['r'] \
286
                or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
287
                >= max_jobs['p']:
288
            sleep(30)
289
        copy(sub_script, f"{run_type}/{conf}")
290
        os.chdir(f"{run_type}/{conf}")
291
        job_name = f'{name[:5]}{run_type[:3].capitalize()}{i}'
292
        sub_order = sub_cmd % (job_name, sub_script)
293
        subm_msg = Popen(sub_order, shell=True, stdout=PIPE).communicate()[0]
294
        job_id = None
295
        for word in subm_msg.decode("utf-8").split():
296
            try:
297
                job_id = int(word.replace('>', '').replace('<', ''))
298
                break
299
            except ValueError:
300
                continue
301
        subm_jobs.append(job_id)
302
        os.chdir(init_dir)
303

    
304
    logger.info('All jobs have been submitted, waiting for them to finish.')
305
    while not all([stat == 'f' for stat in
306
                   get_jobs_status(subm_jobs, stat_cmd, stat_dict)]):
307
        sleep(30)
308
    logger.info('All jobs have finished.')
309

    
310

    
311
def run_calc(run_type, inp_vars, atms_list):
312
    """Directs the calculation run/submission.
313

314
    @param run_type: Type of calculation. 'isolated', 'screening' or
315
    'refinement'
316
    @param inp_vars: Calculation parameters from input file.
317
    @param atms_list: List of ase.Atoms objects containing the sets of atoms
318
    aimed to run the calculations of.
319
    """
320
    from modules.utilities import check_bak
321

    
322
    run_types = ['isolated', 'screening', 'refinement']
323
    if not isinstance(run_type, str) or run_type.lower() not in run_types:
324
        run_type_err = f"'run_type' must be one of the following: {run_types}"
325
        logger.error(run_type_err)
326
        raise ValueError(run_type_err)
327

    
328
    if inp_vars['batch_q_sys']:
329
        logger.info(f"Running {run_type} calculation with {inp_vars['code']} on"
330
                    f" {inp_vars['batch_q_sys']}.")
331
    else:
332
        logger.info(f"Doing a dry run of {run_type}.")
333
    check_bak(run_type)
334
    os.mkdir(run_type)
335

    
336
    # Prepare directories and files for relevant code.
337
    input_files = {'isolated': 'isol_inp_file', 'screening': 'screen_inp_file',
338
                   'refinement': 'refine_inp_file', }
339
    if inp_vars['code'] == 'cp2k':
340
        prep_cp2k(inp_vars[input_files[run_type]], run_type, atms_list,
341
                  inp_vars['project_name'])
342
    elif inp_vars['code'] == "vasp":
343
        prep_vasp(inp_vars[input_files[run_type]], run_type, atms_list,
344
                  inp_vars['project_name'], inp_vars['pbc_cell'],
345
                  inp_vars['potcar_dir'])
346
    # TODO Implement code  == none
347
    # elif: inp_vars['code'] == 'Other codes here'
348

    
349
    # Submit/run Jobs
350
    if inp_vars['batch_q_sys'] == 'sge':
351
        stat_cmd = "qstat | grep %s | awk '{print $5}'"
352
        stat_dict = {'r': 'r', 'p': 'qw', 'f': ''}
353
        submit_jobs(run_type, 'qsub -N %s %s', inp_vars['subm_script'],
354
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
355
                    inp_vars['project_name'])
356
    elif inp_vars['batch_q_sys'] == 'lsf':
357
        stat_cmd = "bjobs -w | grep %s | awk '{print $3}'"
358
        stat_dict = {'r': 'RUN', 'p': 'PEND', 'f': ''}
359
        submit_jobs(run_type, 'bsub -J %s < %s', inp_vars['subm_script'],
360
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
361
                    inp_vars['project_name'])
362
    elif inp_vars['batch_q_sys'] == 'irene':
363
        stat_cmd = "ccc_mstat | grep %s | awk '{print $10}' | cut -c1"
364
        stat_dict = {'r': 'R', 'p': 'P', 'f': ''}
365
        submit_jobs(run_type, 'ccc_msub -r %s %s', inp_vars['subm_script'],
366
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
367
                    inp_vars['project_name'])
368
    elif inp_vars['batch_q_sys'] == 'local':
369
        pass  # TODO implement local
370
    elif not inp_vars['batch_q_sys']:
371
        pass
372
    else:
373
        err_msg = "Unknown value for 'batch_q_sys'."
374
        logger.error(err_msg)
375
        raise ValueError(err_msg)