Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / calculation.py @ 556b41aa

Historique | Voir | Annoter | Télécharger (13,6 ko)

1
import os
2
import logging
3

    
4
logger = logging.getLogger('DockOnSurf')
5

    
6

    
7
def check_finished_calcs(run_type, code):
8
    from modules.utilities import _human_key
9
    """Returns two lists of calculations finished normally and abnormally.
10

11
    @param run_type: The type of calculation to check.
12
    @param code: The code used for the specified job.
13
    @return finished_calcs: List of calculations that have finished normally.
14
    @return unfinished_calcs: List of calculations that have finished abnormally
15
    """
16
    from glob import glob
17
    import ase.io
18
    from modules.utilities import tail
19

    
20
    finished_calcs = []
21
    unfinished_calcs = []
22
    for conf in sorted(os.listdir(run_type), key=_human_key):
23
        if not os.path.isdir(f'{run_type}/{conf}') or 'conf_' not in conf:
24
            continue
25
        if code == 'cp2k':
26
            out_file_list = glob(f"{run_type}/{conf}/*.out")
27
            restart_file_list = glob(f"{run_type}/{conf}/*-1.restart")
28
            if len(out_file_list) == 0 or len(restart_file_list) == 0:
29
                unfinished_calcs.append(conf)
30
            elif len(out_file_list) > 1 or len(restart_file_list) > 1:
31
                warn_msg = f'There is more than one file matching the {code} ' \
32
                           f'pattern for finished calculation (*.out / ' \
33
                           f'*-1.restart) in {run_type}/{conf}: ' \
34
                           f'{out_file_list, restart_file_list}. ' \
35
                           f'Skipping directory.'
36
                logger.warning(warn_msg)
37
                unfinished_calcs.append(conf)
38
            else:
39
                with open(out_file_list[0], 'rb') as out_fh:
40
                    if "PROGRAM STOPPED IN" not in tail(out_fh):
41
                        unfinished_calcs.append(conf)
42
                    else:
43
                        finished_calcs.append(conf)
44
        elif code == 'vasp':
45
            out_file_list = glob(f"{run_type}/{conf}/OUTCAR")
46
            if len(out_file_list) == 0:
47
                unfinished_calcs.append(conf)
48
            elif len(out_file_list) > 1:
49
                warn_msg = f'There is more than one file matching the {code} ' \
50
                           f'pattern for finished calculation (*.out / ' \
51
                           f'*-1.restart) in {run_type}/{conf}: ' \
52
                           f'{out_file_list}. Skipping directory.'
53
                logger.warning(warn_msg)
54
                unfinished_calcs.append(conf)
55
            else:
56
                try:
57
                    ase.io.read(f"{run_type}/{conf}/OUTCAR")
58
                except ValueError:
59
                    unfinished_calcs.append(conf)
60
                    continue
61
                except IndexError:
62
                    unfinished_calcs.append(conf)
63
                    continue
64
                with open(f"{run_type}/{conf}/OUTCAR", 'rb') as out_fh:
65
                    if "General timing and accounting" not in tail(out_fh):
66
                        unfinished_calcs.append(conf)
67
                    else:
68
                        finished_calcs.append(conf)
69
        else:
70
            err_msg = f"Check not implemented for '{code}'."
71
            logger.error(err_msg)
72
            raise NotImplementedError(err_msg)
73
    return finished_calcs, unfinished_calcs
74

    
75

    
76
def prep_cp2k(inp_file: str, run_type: str, atms_list: list, proj_name: str):
77
    """Prepares the directories to run calculations with CP2K.
78

79
    @param inp_file: CP2K Input file to run the calculations with.
80
    @param run_type: Type of calculation. 'isolated', 'screening' or
81
        'refinement'
82
    @param atms_list: list of ase.Atoms objects to run the calculation of.
83
    @param proj_name: name of the project
84
    @return: None
85
    """
86
    from shutil import copy
87
    from pycp2k import CP2K
88
    from modules.utilities import check_bak
89
    if not isinstance(inp_file, str):
90
        err_msg = "'inp_file' must be a string with the path of the CP2K " \
91
                  "input file."
92
        logger.error(err_msg)
93
        raise ValueError(err_msg)
94
    cp2k = CP2K()
95
    cp2k.parse(inp_file)
96
    cp2k.CP2K_INPUT.GLOBAL.Project_name = proj_name+"_"+run_type
97
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
98
    if force_eval.SUBSYS.TOPOLOGY.Coord_file_name is None:
99
        logger.warning("'COORD_FILE_NAME' not specified on CP2K input. Using\n"
100
                       "'coord.xyz'. A new CP2K input file with "
101
                       "the 'COORD_FILE_NAME' variable is created.")
102
        force_eval.SUBSYS.TOPOLOGY.Coord_file_name = 'coord.xyz'
103
        check_bak(inp_file.split('/')[-1])
104
    cp2k.write_input_file(inp_file.split('/')[-1])
105

    
106
    coord_file = force_eval.SUBSYS.TOPOLOGY.Coord_file_name
107

    
108
    # Creating and setting up directories for every configuration.
109
    for i, conf in enumerate(atms_list):
110
        subdir = f'{run_type}/conf_{i}/'
111
        os.mkdir(subdir)
112
        copy(inp_file, subdir)
113
        conf.write(subdir + coord_file)
114

    
115

    
116
def prep_vasp(inp_files, run_type, atms_list, proj_name, cell):
117
    """Prepares the directories to run calculations with VASP.
118

119
    @param inp_files: VASP Input files to run the calculations with.
120
    @param run_type: Type of calculation. 'isolated', 'screening' or
121
        'refinement'
122
    @param atms_list: list of ase.Atoms objects to run the calculation of.
123
    @param proj_name: name of the project.
124
    @param cell: Cell for the Periodic Boundary Conditions.
125
    @return: None
126
    """
127
    from shutil import copy
128
    import os
129

    
130
    import numpy as np
131
    from pymatgen.io.vasp.inputs import Incar
132

    
133
    mand_files = ["INCAR", "KPOINTS", "POTCAR"]
134
    # Check that there are many specified files
135
    if not isinstance(inp_files, list) and all(isinstance(inp_file, str)
136
                                               for inp_file in inp_files):
137
        err_msg = "'inp_files' should be a list of file names/paths"
138
        logger.error(err_msg)
139
        ValueError(err_msg)
140
    # Check that all mandatory files are defined
141
    elif any(not any(mand_file in inp_file.split("/")[-1]
142
                     for inp_file in inp_files) for mand_file in mand_files):
143
        err_msg = f"At least one of the mandatory files {mand_files} was " \
144
                  "not specified."
145
        logger.error(err_msg)
146
        raise FileNotFoundError(err_msg)
147
    # Check that the defined files exist
148
    elif any(not os.path.isfile(inp_file) for inp_file in inp_files):
149
        err_msg = f"At least one of the mandatory files {mand_files} was " \
150
                  "not found."
151
        logger.error(err_msg)
152
        raise FileNotFoundError(err_msg)
153
    incar = ""
154
    for i, inp_file in enumerate(inp_files):
155
        file_name = inp_file.split("/")[-1]
156
        if "INCAR" in file_name:
157
            incar = Incar.from_file(inp_file)
158
            incar["SYSTEM"] = proj_name+"_"+run_type
159

    
160
    for c, conf in enumerate(atms_list):
161
        subdir = f'{run_type}/conf_{c}/'
162
        os.mkdir(subdir)
163
        for inp_file in inp_files:
164
            file_name = inp_file.split("/")[-1]
165
            if "INCAR" in file_name:
166
                incar.write_file(subdir+"INCAR")
167
            elif "KPOINTS" in file_name and "KPOINTS" != file_name:
168
                copy(inp_file, subdir+"KPOINTS")
169
            elif "POTCAR" in file_name and "POTCAR" != file_name:
170
                copy(inp_file, subdir+"POTCAR")
171
            else:
172
                copy(inp_file, subdir)
173
        if cell is not False and np.linalg.det(cell) != 0.0:
174
            conf.pbc = True
175
            conf.cell = cell
176
            conf.center()
177
        elif np.linalg.det(conf.cell) == 0:
178
            err_msg = "Cell is not defined"
179
            logger.error(err_msg)
180
            raise ValueError(err_msg)
181
        conf.write(subdir+"POSCAR", format="vasp")
182

    
183

    
184
def get_jobs_status(job_ids, stat_cmd, stat_dict):
185
    """Returns a list of job status for a list of job ids.
186

187
    @param job_ids: list of all jobs to be checked their status.
188
    @param stat_cmd: Command to check job status.
189
    @param stat_dict: Dictionary with pairs of job status (r, p, f) and the
190
        pattern it matches in the output of the stat_cmd.
191
    @return: list of status for every job.
192
    """
193
    from subprocess import PIPE, Popen
194
    status_list = []
195
    for job in job_ids:
196
        stat_msg = Popen(stat_cmd % job, shell=True,
197
                         stdout=PIPE).communicate()[0].decode('utf-8').strip()
198
        if stat_dict['r'] == stat_msg:
199
            status_list.append('r')
200
        elif stat_dict['p'] == stat_msg:
201
            status_list.append('p')
202
        elif stat_dict['f'] == stat_msg:
203
            status_list.append('f')
204
        else:
205
            logger.warning(f'Unrecognized job {job} status: {stat_msg}')
206
    return status_list
207

    
208

    
209
def submit_jobs(run_type, sub_cmd, sub_script, stat_cmd, stat_dict, max_jobs,
210
                name):
211
    """Submits jobs to a custom queuing system with the provided script
212

213
    @param run_type: Type of calculation. 'isolated', 'screening', 'refinement'
214
    @param sub_cmd: Bash command used to submit jobs.
215
    @param sub_script: script for the job submission.
216
    @param stat_cmd: Bash command to check job status.
217
    @param stat_dict: Dictionary with pairs of job status: r, p, f (ie. running
218
        pending and finished) and the pattern it matches in the output of the
219
        stat_cmd.
220
    @param max_jobs: dict: Contains the maximum number of jobs to be both
221
        running, pending/queued and pending+running. When the relevant maximum
222
        is reached no jobs more are submitted.
223
    @param name: name of the project.
224
    """
225
    from shutil import copy
226
    from time import sleep
227
    from subprocess import PIPE, Popen
228
    from modules.utilities import _human_key
229
    subm_jobs = []
230
    init_dir = os.getcwd()
231
    for conf in sorted(os.listdir(run_type), key=_human_key):
232
        i = conf.split('_')[1]
233
        while get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") + \
234
                get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
235
                >= max_jobs['rp']\
236
                or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") \
237
                >= max_jobs['r'] \
238
                or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
239
                >= max_jobs['p']:
240
            sleep(30)
241
        copy(sub_script, f"{run_type}/{conf}")
242
        os.chdir(f"{run_type}/{conf}")
243
        job_name = f'{name[:5]}{run_type[:3].capitalize()}{i}'
244
        sub_order = sub_cmd % (job_name, sub_script)
245
        subm_msg = Popen(sub_order, shell=True, stdout=PIPE).communicate()[0]
246
        job_id = None
247
        for word in subm_msg.decode("utf-8").split():
248
            try:
249
                job_id = int(word.replace('>', '').replace('<', ''))
250
                break
251
            except ValueError:
252
                continue
253
        subm_jobs.append(job_id)
254
        os.chdir(init_dir)
255

    
256
    logger.info('All jobs have been submitted, waiting for them to finish.')
257
    while not all([stat == 'f' for stat in
258
                   get_jobs_status(subm_jobs, stat_cmd, stat_dict)]):
259
        sleep(30)
260
    logger.info('All jobs have finished.')
261

    
262

    
263
def run_calc(run_type, inp_vars, atms_list):
264
    """Directs the calculation run according to the provided arguments.
265

266
    @param run_type: Type of calculation. 'isolated', 'screening' or
267
    'refinement'
268
    @param inp_vars: Calculation parameters from input file.
269
    @param atms_list: List of ase.Atoms objects containing the sets of atoms
270
    aimed to run the calculations of.
271
    """
272
    from modules.utilities import check_bak
273

    
274
    run_types = ['isolated', 'screening', 'refinement']
275
    if not isinstance(run_type, str) or run_type.lower() not in run_types:
276
        run_type_err = f"'run_type' must be one of the following: {run_types}"
277
        logger.error(run_type_err)
278
        raise ValueError(run_type_err)
279

    
280
    if inp_vars['batch_q_sys']:
281
        logger.info(f"Running {run_type} calculation with {inp_vars['code']} on"
282
                    f" {inp_vars['batch_q_sys']}.")
283
    else:
284
        logger.info(f"Doing a dry run of {run_type}.")
285
    check_bak(run_type)
286
    os.mkdir(run_type)
287

    
288
    # Prepare directories and files for relevant code.
289
    input_files = {'isolated': 'isol_inp_file', 'screening': 'screen_inp_file',
290
                   'refinement': 'refine_inp_file', }
291
    if inp_vars['code'] == 'cp2k':
292
        prep_cp2k(inp_vars[input_files[run_type]], run_type, atms_list,
293
                  inp_vars['project_name'])
294
    elif inp_vars['code'] == "vasp":
295
        prep_vasp(inp_vars[input_files[run_type]], run_type, atms_list,
296
                  inp_vars['project_name'], inp_vars['pbc_cell'])
297
    # elif: inp_vars['code'] == 'Other codes here'
298

    
299
    # Submit/run Jobs
300
    if inp_vars['batch_q_sys'] == 'sge':
301
        stat_cmd = "qstat | grep %s | awk '{print $5}'"
302
        stat_dict = {'r': 'r', 'p': 'qw', 'f': ''}
303
        submit_jobs(run_type, 'qsub -N %s %s', inp_vars['subm_script'],
304
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
305
                    inp_vars['project_name'])
306
    elif inp_vars['batch_q_sys'] == 'lsf':
307
        stat_cmd = "bjobs -w | grep %s | awk '{print $3}'"
308
        stat_dict = {'r': 'RUN', 'p': 'PEND', 'f': ''}
309
        submit_jobs(run_type, 'bsub -J %s < %s', inp_vars['subm_script'],
310
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
311
                    inp_vars['project_name'])
312
    elif inp_vars['batch_q_sys'] == 'irene':
313
        stat_cmd = "ccc_mstat | grep %s | awk '{print $10}' | cut -c1"
314
        stat_dict = {'r': 'R', 'p': 'P', 'f': ''}
315
        submit_jobs(run_type, 'ccc_msub -r %s %s', inp_vars['subm_script'],
316
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
317
                    inp_vars['project_name'])
318

    
319
    elif inp_vars['batch_q_sys'] == 'local':
320
        pass  # TODO implement local
321
    elif not inp_vars['batch_q_sys']:
322
        pass
323
    else:
324
        err_msg = "Unknown value for 'batch_q_sys'."
325
        logger.error(err_msg)
326
        raise ValueError(err_msg)