Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / calculation.py @ 365d5b9a

Historique | Voir | Annoter | Télécharger (9 ko)

1
import os
2
import logging
3

    
4
logger = logging.getLogger('DockOnSurf')
5

    
6

    
7
def check_finished_calcs(run_type, code):
8
    """Returns two lists of calculations finished normally and abnormally.
9

10
    @param run_type: The type of calculation to check.
11
    @param code: The code used for the specified job.
12
    @return finished_calcs: List of calculations that have finished normally.
13
    @return unfinished_calcs: List of calculations that have finished abnormally
14
    """
15
    from glob import glob
16
    from modules.utilities import tail
17

    
18
    finished_calcs = []
19
    unfinished_calcs = []
20
    for conf in os.listdir(run_type):
21
        if not os.path.isdir(f'{run_type}/{conf}') or 'conf_' not in conf:
22
            continue
23
        if code == 'cp2k':
24
            out_file_list = glob(f"{run_type}/{conf}/*.out")
25
            restart_file_list = glob(f"{run_type}/{conf}/*-1.restart")
26
            if len(out_file_list) == 0 or len(restart_file_list) == 0:
27
                unfinished_calcs.append(conf)  # TODO specify separetely out and
28
                # TODO restart
29
            elif len(out_file_list) > 1 or len(restart_file_list) > 1:
30
                warn_msg = f'There is more than one file matching the {code} ' \
31
                           f'pattern for finished calculation (*.out / ' \
32
                           f'*-1.restart) in {run_type}/{conf}: ' \
33
                           f'{out_file_list, restart_file_list}. ' \
34
                           f'Skipping directory.'
35
                logger.warning(warn_msg)
36
                unfinished_calcs.append(conf)
37
            else:
38
                with open(out_file_list[0], 'rb') as out_fh:
39
                    if "PROGRAM STOPPED IN" not in tail(out_fh):
40
                        unfinished_calcs.append(conf)
41
                    else:
42
                        finished_calcs.append(conf)
43
    return finished_calcs, unfinished_calcs
44

    
45

    
46
def prep_cp2k(inp_file, run_type, atms_list, proj_name):
47
    """Prepares the directories to run isolated calculation with CP2K.
48

49
    @param inp_file: CP2K Input file to run the calculations with.
50
    @param run_type: Type of calculation. 'isolated', 'screening' or
51
        'refinement'
52
    @param atms_list: list of ase.Atoms objects to run the calculation of.
53
    @param proj_name: name of the project
54
    @return: None
55
    """
56
    from shutil import copy
57
    import ase.io
58
    from pycp2k import CP2K
59
    from modules.utilities import check_bak
60
    cp2k = CP2K()
61
    cp2k.parse(inp_file)
62
    cp2k.CP2K_INPUT.GLOBAL.Project_name = proj_name+"_"+run_type
63
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
64
    if force_eval.SUBSYS.TOPOLOGY.Coord_file_name is None:
65
        logger.warning("'COORD_FILE_NAME' not specified on CP2K input. Using\n"
66
                       "'coord.xyz'. A new CP2K input file with "
67
                       "the 'COORD_FILE_NAME' variable is created.")
68
        force_eval.SUBSYS.TOPOLOGY.Coord_file_name = 'coord.xyz'
69
        check_bak(inp_file.split('/')[-1])
70
    cp2k.write_input_file(inp_file.split('/')[-1])
71

    
72
    coord_file = force_eval.SUBSYS.TOPOLOGY.Coord_file_name
73

    
74
    # Creating and setting up directories for every configuration.
75
    for i, conf in enumerate(atms_list):
76
        os.mkdir(f'{run_type}/conf_{i}')
77
        copy(inp_file, f'{run_type}/conf_{i}/')
78
        ase.io.write(f'{run_type}/conf_{i}/{coord_file}', conf)
79

    
80

    
81
def get_jobs_status(job_ids, stat_cmd, stat_dict):
82
    """Returns a list of job status for a list of job ids.
83

84
    @param job_ids: list of all jobs to be checked their status.
85
    @param stat_cmd: Command to check job status.
86
    @param stat_dict: Dictionary with pairs of job status (r, p, f) and the
87
        pattern it matches in the output of the stat_cmd.
88
    @return: list of status for every job.
89
    """
90
    from subprocess import PIPE, Popen
91
    status_list = []
92
    for job in job_ids:
93
        stat_order = stat_cmd % job
94
        stat_msg = Popen(stat_order, shell=True,
95
                         stdout=PIPE).communicate()[0].decode('utf-8').strip()
96
        if stat_dict['r'] == stat_msg:
97
            status_list.append('r')
98
        elif stat_dict['p'] == stat_msg:
99
            status_list.append('p')
100
        elif stat_dict['f'] == stat_msg:
101
            status_list.append('f')
102
        else:
103
            logger.warning(f'Unrecognized job status: {job}')
104
    return status_list
105

    
106

    
107
def submit_jobs(run_type, sub_cmd, sub_script, stat_cmd, stat_dict, max_jobs,
108
                name):
109
    """Submits jobs to a custom queuing system with the provided script
110

111
    @param run_type: Type of calculation. 'isolated', 'screening', 'refinement'
112
    @param sub_cmd: Bash command used to submit jobs.
113
    @param sub_script: script for the job submission.
114
    @param stat_cmd: Bash command to check job status.
115
    @param stat_dict: Dictionary with pairs of job status: r, p, f (ie. running
116
        pending and finished) and the pattern it matches in the output of the
117
        stat_cmd.
118
    @param max_jobs: dict: Contains the maximum number of jobs to be both
119
        running, pending/queued and pending+running. When the relevant maximum
120
        is reached no jobs more are submitted.
121
    @param name: name of the project.
122
    """
123
    from shutil import copy
124
    from time import sleep
125
    from subprocess import PIPE, Popen
126
    subm_jobs = []
127
    init_dir = os.getcwd()
128
    for conf in os.listdir(run_type):
129
        i = conf.split('_')[1]
130
        while get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") + \
131
                get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
132
                >= max_jobs['rp']\
133
                or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") \
134
                >= max_jobs['r'] \
135
                or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
136
                >= max_jobs['p']:
137
            sleep(30)
138
        copy(sub_script, f"{run_type}/{conf}")
139
        os.chdir(f"{run_type}/{conf}")
140
        job_name = f'{name[:5]}{run_type[:3].capitalize()}{i}'
141
        sub_order = sub_cmd % (job_name, sub_script)
142
        subm_msg = Popen(sub_order, shell=True, stdout=PIPE).communicate()[0]
143
        job_id = None
144
        for word in subm_msg.decode("utf-8").split():
145
            try:
146
                job_id = int(word.replace('>', '').replace('<', ''))
147
                break
148
            except ValueError:
149
                continue
150
        subm_jobs.append(job_id)
151
        os.chdir(init_dir)
152

    
153
    logger.info('All jobs have been submitted, waiting for them to finish.')
154
    while not all([stat == 'f' for stat in
155
                   get_jobs_status(subm_jobs, stat_cmd, stat_dict)]):
156
        sleep(30)
157
    logger.info('All jobs have finished.')
158

    
159

    
160
def run_calc(run_type, inp_vars, atms_list):
161
    """Directs the calculation run according to the provided arguments.
162

163
    @param run_type: Type of calculation. 'isolated', 'screening' or
164
    'refinement'
165
    @param inp_vars: Calculation parameters from input file.
166
    @param atms_list: List of ase.Atoms objects containing the sets of atoms
167
    aimed to run the calculations of.
168
    """
169
    from modules.utilities import check_bak
170
    run_types = ['isolated', 'screening', 'refinement']
171
    if not isinstance(run_type, str) or run_type.lower() not in run_types:
172
        run_type_err = f"'run_type' must be one of the following: {run_types}"
173
        logger.error(run_type_err)
174
        raise ValueError(run_type_err)
175

    
176
    if inp_vars['batch_q_sys']:
177
        logger.info(f"Running {run_type} calculation with {inp_vars['code']} on"
178
                    f" {inp_vars['batch_q_sys']}.")
179
    else:
180
        logger.info(f"Doing a dry run of {run_type}.")
181
    check_bak(run_type)
182
    os.mkdir(run_type)
183

    
184
    # Prepare directories and files for relevant code.
185
    input_files = {'isolated': 'isol_inp_file', 'screening': 'screen_inp_file',
186
                   'refinement': 'refine_inp_file', }
187
    if inp_vars['code'] == 'cp2k':
188
        prep_cp2k(inp_vars[input_files[run_type]], run_type, atms_list,
189
                  inp_vars['project_name'])
190
    # elif: inp_vars['code'] == 'Other codes here'
191

    
192
    # Submit/run Jobs
193
    if inp_vars['batch_q_sys'] == 'sge':
194
        stat_cmd = "qstat | grep %s | awk '{print $5}'"
195
        stat_dict = {'r': 'r', 'p': 'qw', 'f': ''}
196
        submit_jobs(run_type, 'qsub -N %s %s', inp_vars['subm_script'],
197
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
198
                    inp_vars['project_name'])
199
    elif inp_vars['batch_q_sys'] == 'lsf':
200
        stat_cmd = "bjobs -w | grep %s | awk '{print $3}'"
201
        stat_dict = {'r': 'RUN', 'p': 'PEND', 'f': ''}
202
        submit_jobs(run_type, 'bsub -J %s < %s', inp_vars['subm_script'],
203
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
204
                    inp_vars['project_name'])
205
    elif inp_vars['batch_q_sys'] == 'irene':
206
        stat_cmd = "ccc_mstat | grep %s | awk '{print $10}' | cut -c1"
207
        stat_dict = {'r': 'R', 'p': 'P', 'f': ''}
208
        submit_jobs(run_type, 'ccc_msub -r %s %s', inp_vars['subm_script'],
209
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
210
                    inp_vars['project_name'])
211

    
212
    elif inp_vars['batch_q_sys'] == 'local':
213
        pass  # TODO implement local
214
    elif not inp_vars['batch_q_sys']:
215
        pass