dockonsurf / modules / calculation.py @ 5412d6ef
Historique | Voir | Annoter | Télécharger (9,03 ko)
1 |
import os |
---|---|
2 |
import logging |
3 |
|
4 |
logger = logging.getLogger('DockOnSurf')
|
5 |
|
6 |
|
7 |
def check_finished_calcs(run_type, code): |
8 |
"""Returns two lists of calculations finished normally and abnormally.
|
9 |
|
10 |
@param run_type: The type of calculation to check.
|
11 |
@param code: The code used for the specified job.
|
12 |
@return finished_calcs: List of calculations that have finished normally.
|
13 |
@return unfinished_calcs: List of calculations that have finished abnormally
|
14 |
"""
|
15 |
from glob import glob |
16 |
from modules.utilities import tail |
17 |
|
18 |
finished_calcs = [] |
19 |
unfinished_calcs = [] |
20 |
for conf in os.listdir(run_type): |
21 |
if not os.path.isdir(f'{run_type}/{conf}') or 'conf_' not in conf: |
22 |
continue
|
23 |
if code == 'cp2k': |
24 |
out_file_list = glob(f"{run_type}/{conf}/*.out")
|
25 |
restart_file_list = glob(f"{run_type}/{conf}/*-1.restart")
|
26 |
if len(out_file_list) == 0 or len(restart_file_list) == 0: |
27 |
unfinished_calcs.append(conf) # TODO specify separetely out and
|
28 |
# TODO restart
|
29 |
elif len(out_file_list) > 1 or len(restart_file_list) > 1: |
30 |
warn_msg = f'There is more than one file matching the {code} ' \
|
31 |
f'pattern for finished calculation (*.out / ' \
|
32 |
f'*-1.restart) in {run_type}/{conf}: ' \
|
33 |
f'{out_file_list, restart_file_list}. ' \
|
34 |
f'Skipping directory.'
|
35 |
logger.warning(warn_msg) |
36 |
unfinished_calcs.append(conf) |
37 |
else:
|
38 |
with open(out_file_list[0], 'rb') as out_fh: |
39 |
if "PROGRAM STOPPED IN" not in tail(out_fh): |
40 |
unfinished_calcs.append(conf) |
41 |
else:
|
42 |
finished_calcs.append(conf) |
43 |
return finished_calcs, unfinished_calcs
|
44 |
|
45 |
|
46 |
def prep_cp2k(inp_file, run_type, atms_list): # TODO name to PROJECT_NAME |
47 |
"""Prepares the directories to run isolated calculation with CP2K.
|
48 |
|
49 |
@param inp_file: CP2K Input file to run the calculations with.
|
50 |
@param run_type: Type of calculation. 'isolated', 'screening' or
|
51 |
'refinement'
|
52 |
@param atms_list: list of ase.Atoms objects to run the calculation of.
|
53 |
@return: None
|
54 |
"""
|
55 |
from shutil import copy |
56 |
import ase.io |
57 |
from pycp2k import CP2K |
58 |
from modules.utilities import check_bak |
59 |
cp2k = CP2K() |
60 |
cp2k.parse(inp_file) |
61 |
force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
|
62 |
if force_eval.SUBSYS.TOPOLOGY.Coord_file_name is None: |
63 |
logger.warning("'COORD_FILE_NAME' not specified on CP2K input. Using\n"
|
64 |
"'coord.xyz'. A new CP2K input file with "
|
65 |
"the 'COORD_FILE_NAME' variable is created.")
|
66 |
force_eval.SUBSYS.TOPOLOGY.Coord_file_name = 'coord.xyz'
|
67 |
print(inp_file.split('/')[-1]) |
68 |
check_bak(inp_file.split('/')[-1]) |
69 |
cp2k.write_input_file(inp_file.split('/')[-1]) |
70 |
|
71 |
coord_file = force_eval.SUBSYS.TOPOLOGY.Coord_file_name |
72 |
|
73 |
# Creating and setting up directories for every configuration.
|
74 |
for i, conf in enumerate(atms_list): |
75 |
os.mkdir(f'{run_type}/conf_{i}')
|
76 |
copy(inp_file, f'{run_type}/conf_{i}/')
|
77 |
ase.io.write(f'{run_type}/conf_{i}/{coord_file}', conf)
|
78 |
|
79 |
|
80 |
def get_jobs_status(job_ids, stat_cmd, stat_dict): |
81 |
"""Returns a list of job status for a list of job ids.
|
82 |
|
83 |
@param job_ids: list of all jobs to be checked their status.
|
84 |
@param stat_cmd: Command to check job status.
|
85 |
@param stat_dict: Dictionary with pairs of job status (r, p, f) and the
|
86 |
pattern it matches in the output of the stat_cmd.
|
87 |
@return: list of status for every job.
|
88 |
"""
|
89 |
from subprocess import PIPE, Popen |
90 |
status_list = [] |
91 |
for job in job_ids: |
92 |
stat_order = stat_cmd % job |
93 |
stat_msg = Popen(stat_order, shell=True,
|
94 |
stdout=PIPE).communicate()[0].decode('utf-8').strip() |
95 |
if stat_dict['r'] == stat_msg: |
96 |
status_list.append('r')
|
97 |
elif stat_dict['p'] == stat_msg: |
98 |
status_list.append('p')
|
99 |
elif stat_dict['f'] == stat_msg: |
100 |
status_list.append('f')
|
101 |
else:
|
102 |
logger.warning(f'Unrecognized job status: {job}')
|
103 |
return status_list
|
104 |
|
105 |
|
106 |
def submit_jobs(run_type, sub_cmd, sub_script, stat_cmd, stat_dict, max_jobs, |
107 |
name): |
108 |
"""Submits jobs to a custom queuing system with the provided script
|
109 |
|
110 |
@param run_type: Type of calculation. 'isolated', 'screening', 'refinement'
|
111 |
@param sub_cmd: Bash command used to submit jobs.
|
112 |
@param sub_script: script for the job submission.
|
113 |
@param stat_cmd: Bash command to check job status.
|
114 |
@param stat_dict: Dictionary with pairs of job status: r, p, f (ie. running
|
115 |
pending and finished) and the pattern it matches in the output of the
|
116 |
stat_cmd.
|
117 |
@param max_jobs: dict: Contains the maximum number of jobs to be both
|
118 |
running, pending/queued and pending+running. When the relevant maximum
|
119 |
is reached no jobs more are submitted.
|
120 |
@param name: name of the project.
|
121 |
"""
|
122 |
from shutil import copy |
123 |
from time import sleep |
124 |
from subprocess import PIPE, Popen |
125 |
subm_jobs = [] |
126 |
init_dir = os.getcwd() |
127 |
for conf in os.listdir(run_type): |
128 |
i = conf.split('_')[1] |
129 |
while get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") + \ |
130 |
get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
|
131 |
>= max_jobs['rp']\
|
132 |
or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") \ |
133 |
>= max_jobs['r'] \
|
134 |
or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \ |
135 |
>= max_jobs['p']:
|
136 |
sleep(30)
|
137 |
copy(sub_script, f"{run_type}/{conf}")
|
138 |
os.chdir(f"{run_type}/{conf}")
|
139 |
job_name = f'{name[:5]}{run_type[:3].capitalize()}{i}'
|
140 |
sub_order = sub_cmd % (job_name, sub_script) |
141 |
subm_msg = Popen(sub_order, shell=True, stdout=PIPE).communicate()[0] |
142 |
job_id = None
|
143 |
for word in subm_msg.decode("utf-8").split(): |
144 |
try:
|
145 |
job_id = int(word.replace('>', '').replace('<', '')) |
146 |
break
|
147 |
except ValueError: |
148 |
continue
|
149 |
subm_jobs.append(job_id) |
150 |
os.chdir(init_dir) |
151 |
|
152 |
logger.info('All jobs have been submitted, waiting for them to finish.')
|
153 |
while not all([stat == 'f' for stat in |
154 |
get_jobs_status(subm_jobs, stat_cmd, stat_dict)]): |
155 |
sleep(30)
|
156 |
logger.info('All jobs have finished.')
|
157 |
|
158 |
|
159 |
def run_calc(run_type, inp_vars, atms_list): |
160 |
"""Directs the calculation run according to the provided arguments.
|
161 |
|
162 |
@param run_type: Type of calculation. 'isolated', 'screening' or
|
163 |
'refinement'
|
164 |
@param inp_vars: Calculation parameters from input file.
|
165 |
@param atms_list: List of ase.Atoms objects containing the sets of atoms
|
166 |
aimed to run the calculations of.
|
167 |
"""
|
168 |
from modules.utilities import check_bak |
169 |
run_types = ['isolated', 'screening', 'refinement'] |
170 |
if not isinstance(run_type, str) or run_type.lower() not in run_types: |
171 |
run_type_err = f"'run_type' must be one of the following: {run_types}"
|
172 |
logger.error(run_type_err) |
173 |
raise ValueError(run_type_err) |
174 |
|
175 |
if inp_vars['batch_q_sys']: |
176 |
logger.info(f"Running {run_type} calculation with {inp_vars['code']} on"
|
177 |
f" {inp_vars['batch_q_sys']}.")
|
178 |
else:
|
179 |
logger.info(f"Doing a dry run of {run_type}.")
|
180 |
check_bak(run_type) |
181 |
os.mkdir(run_type) |
182 |
|
183 |
# Prepare directories and files for relevant code.
|
184 |
if inp_vars['code'] == 'cp2k': |
185 |
if run_type == 'isolated': |
186 |
prep_cp2k(inp_vars['isol_inp_file'], run_type, atms_list)
|
187 |
elif run_type == 'screening': |
188 |
prep_cp2k(inp_vars['screen_inp_file'], run_type, atms_list)
|
189 |
elif run_type == 'refinement': |
190 |
prep_cp2k(inp_vars['refine_inp_file'], run_type, atms_list)
|
191 |
# elif: inp_vars['code'] == 'Other codes here'
|
192 |
|
193 |
# Submit/run Jobs
|
194 |
if inp_vars['batch_q_sys'] == 'sge': |
195 |
stat_cmd = "qstat | grep %s | awk '{print $5}'"
|
196 |
stat_dict = {'r': 'r', 'p': 'qw', 'f': ''} |
197 |
submit_jobs(run_type, 'qsub -N %s %s', inp_vars['subm_script'], |
198 |
stat_cmd, stat_dict, inp_vars['max_jobs'],
|
199 |
inp_vars['project_name'])
|
200 |
elif inp_vars['batch_q_sys'] == 'lsf': |
201 |
stat_cmd = "bjobs -w | grep %s | awk '{print $3}'"
|
202 |
stat_dict = {'r': 'RUN', 'p': 'PEND', 'f': ''} |
203 |
submit_jobs(run_type, 'bsub -J %s < %s', inp_vars['subm_script'], |
204 |
stat_cmd, stat_dict, inp_vars['max_jobs'],
|
205 |
inp_vars['project_name'])
|
206 |
elif inp_vars['batch_q_sys'] == 'irene': |
207 |
stat_cmd = "ccc_mstat | grep %s | awk '{print $10}' | cut -c1"
|
208 |
stat_dict = {'r': 'R', 'p': 'P', 'f': ''} |
209 |
submit_jobs(run_type, 'ccc_msub -r %s %s', inp_vars['subm_script'], |
210 |
stat_cmd, stat_dict, inp_vars['max_jobs'],
|
211 |
inp_vars['project_name'])
|
212 |
|
213 |
elif inp_vars['batch_q_sys'] == 'local': |
214 |
pass # TODO implement local |
215 |
elif not inp_vars['batch_q_sys']: |
216 |
pass
|