dockonsurf / modules / calculation.py @ 19567be2
Historique | Voir | Annoter | Télécharger (15,15 ko)
1 |
import os |
---|---|
2 |
import logging |
3 |
|
4 |
logger = logging.getLogger('DockOnSurf')
|
5 |
|
6 |
|
7 |
def check_finished_calcs(run_type, code): |
8 |
from modules.utilities import _human_key |
9 |
"""Returns two lists of calculations finished normally and abnormally.
|
10 |
|
11 |
@param run_type: The type of calculation to check.
|
12 |
@param code: The code used for the specified job.
|
13 |
@return finished_calcs: List of calculation directories that have finished
|
14 |
normally.
|
15 |
@return unfinished_calcs: List of calculation directories that have finished
|
16 |
abnormally.
|
17 |
"""
|
18 |
from glob import glob |
19 |
import ase.io |
20 |
from modules.utilities import tail, is_binary |
21 |
|
22 |
finished_calcs = [] |
23 |
unfinished_calcs = [] |
24 |
for conf_dir in sorted(os.listdir(run_type), key=_human_key): |
25 |
conf_path = f'{run_type}/{conf_dir}/'
|
26 |
if not os.path.isdir(conf_path) or 'conf_' not in conf_dir: |
27 |
continue
|
28 |
if code == 'cp2k': |
29 |
restart_file_list = glob(f"{conf_path}/*-1.restart")
|
30 |
if len(restart_file_list) == 0: |
31 |
logger.warning(f"No *-1.restart file found on {conf_path}.")
|
32 |
unfinished_calcs.append(conf_dir) |
33 |
continue
|
34 |
elif len(restart_file_list) > 1: |
35 |
warn_msg = f'There is more than one CP2K restart file ' \
|
36 |
f'(*-1.restart / in {conf_path}: ' \
|
37 |
f'{restart_file_list}. Skipping directory.'
|
38 |
unfinished_calcs.append(conf_dir) |
39 |
logger.warning(warn_msg) |
40 |
continue
|
41 |
out_files = [] |
42 |
for file in os.listdir(conf_path): |
43 |
if is_binary(conf_path+file): |
44 |
continue
|
45 |
with open(conf_path+file, "rb") as out_fh: |
46 |
tail_out_str = tail(out_fh) |
47 |
if tail_out_str.count("PROGRAM STOPPED IN") == 1: |
48 |
out_files.append(file)
|
49 |
if len(out_files) > 1: |
50 |
warn_msg = f'There is more than one CP2K output file in ' \
|
51 |
f'{conf_path}: {out_files}. Skipping directory.'
|
52 |
logger.warning(warn_msg) |
53 |
unfinished_calcs.append(conf_dir) |
54 |
elif len(out_files) == 0: |
55 |
warn_msg = f'There is no CP2K output file in {conf_path}. ' \
|
56 |
'Skipping directory.'
|
57 |
logger.warning(warn_msg) |
58 |
unfinished_calcs.append(conf_dir) |
59 |
else:
|
60 |
finished_calcs.append(conf_dir) |
61 |
elif code == 'vasp': |
62 |
out_file_list = glob(f"{conf_path}/OUTCAR")
|
63 |
if len(out_file_list) == 0: |
64 |
unfinished_calcs.append(conf_dir) |
65 |
elif len(out_file_list) > 1: |
66 |
warn_msg = f'There is more than one file matching the {code} ' \
|
67 |
f'pattern for finished calculation (*.out / ' \
|
68 |
f'*-1.restart) in {conf_path}: ' \
|
69 |
f'{out_file_list}. Skipping directory.'
|
70 |
logger.warning(warn_msg) |
71 |
unfinished_calcs.append(conf_dir) |
72 |
else:
|
73 |
try:
|
74 |
ase.io.read(f"{conf_path}/OUTCAR")
|
75 |
except ValueError: |
76 |
unfinished_calcs.append(conf_dir) |
77 |
continue
|
78 |
except IndexError: |
79 |
unfinished_calcs.append(conf_dir) |
80 |
continue
|
81 |
with open(f"{conf_path}/OUTCAR", 'rb') as out_fh: |
82 |
if "General timing and accounting" not in tail(out_fh): |
83 |
unfinished_calcs.append(conf_dir) |
84 |
else:
|
85 |
finished_calcs.append(conf_dir) |
86 |
else:
|
87 |
err_msg = f"Check not implemented for '{code}'."
|
88 |
logger.error(err_msg) |
89 |
raise NotImplementedError(err_msg) |
90 |
return finished_calcs, unfinished_calcs
|
91 |
|
92 |
|
93 |
def prep_cp2k(inp_file: str, run_type: str, atms_list: list, proj_name: str): |
94 |
"""Prepares the directories to run calculations with CP2K.
|
95 |
|
96 |
@param inp_file: CP2K Input file to run the calculations with.
|
97 |
@param run_type: Type of calculation. 'isolated', 'screening' or
|
98 |
'refinement'
|
99 |
@param atms_list: list of ase.Atoms objects to run the calculation of.
|
100 |
@param proj_name: name of the project
|
101 |
@return: None
|
102 |
"""
|
103 |
from shutil import copy |
104 |
from pycp2k import CP2K |
105 |
from modules.utilities import check_bak |
106 |
if not isinstance(inp_file, str): |
107 |
err_msg = "'inp_file' must be a string with the path of the CP2K " \
|
108 |
"input file."
|
109 |
logger.error(err_msg) |
110 |
raise ValueError(err_msg) |
111 |
cp2k = CP2K() |
112 |
cp2k.parse(inp_file) |
113 |
cp2k.CP2K_INPUT.GLOBAL.Project_name = proj_name+"_"+run_type
|
114 |
force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
|
115 |
if force_eval.SUBSYS.TOPOLOGY.Coord_file_name is None: |
116 |
logger.warning("'COORD_FILE_NAME' not specified on CP2K input. Using\n"
|
117 |
"'coord.xyz'. A new CP2K input file with "
|
118 |
"the 'COORD_FILE_NAME' variable is created.")
|
119 |
force_eval.SUBSYS.TOPOLOGY.Coord_file_name = 'coord.xyz'
|
120 |
check_bak(inp_file.split('/')[-1]) |
121 |
new_inp_file = inp_file.split('/')[-1] |
122 |
cp2k.write_input_file(new_inp_file) |
123 |
|
124 |
coord_file = force_eval.SUBSYS.TOPOLOGY.Coord_file_name |
125 |
|
126 |
# Creating and setting up directories for every configuration.
|
127 |
for i, conf in enumerate(atms_list): |
128 |
subdir = f'{run_type}/conf_{i}/'
|
129 |
os.mkdir(subdir) |
130 |
copy(new_inp_file, subdir) |
131 |
conf.write(subdir + coord_file) |
132 |
|
133 |
|
134 |
def prep_vasp(inp_files, run_type, atms_list, proj_name, cell, potcar_dir): |
135 |
"""Prepares the directories to run calculations with VASP.
|
136 |
|
137 |
@param inp_files: VASP Input files to run the calculations with.
|
138 |
@param run_type: Type of calculation. 'isolated', 'screening' or
|
139 |
'refinement'
|
140 |
@param atms_list: list of ase.Atoms objects to run the calculation of.
|
141 |
@param proj_name: name of the project.
|
142 |
@param cell: Cell for the Periodic Boundary Conditions.
|
143 |
@param potcar_dir: Directory to find POTCARs for each element.
|
144 |
@return: None
|
145 |
"""
|
146 |
from shutil import copy |
147 |
import os |
148 |
|
149 |
import numpy as np |
150 |
from pymatgen.io.vasp.inputs import Incar |
151 |
|
152 |
if not potcar_dir: |
153 |
mand_files = ["INCAR", "KPOINTS", "POTCAR"] |
154 |
elif any("POTCAR" in inp_file for inp_file in inp_files): |
155 |
mand_files = ["INCAR", "KPOINTS", "POTCAR"] |
156 |
else:
|
157 |
mand_files = ["INCAR", "KPOINTS"] |
158 |
|
159 |
# Check that there are many specified files
|
160 |
if not isinstance(inp_files, list) and all(isinstance(inp_file, str) |
161 |
for inp_file in inp_files): |
162 |
err_msg = "'inp_files' should be a list of file names/paths"
|
163 |
logger.error(err_msg) |
164 |
ValueError(err_msg)
|
165 |
# Check that all mandatory files are defined
|
166 |
elif any(not any(mand_file in inp_file.split("/")[-1] |
167 |
for inp_file in inp_files) for mand_file in mand_files): |
168 |
err_msg = f"At least one of the mandatory files {mand_files} was " \
|
169 |
"not specified."
|
170 |
logger.error(err_msg) |
171 |
raise FileNotFoundError(err_msg)
|
172 |
# Check that the defined files exist
|
173 |
elif any(not os.path.isfile(inp_file) for inp_file in inp_files): |
174 |
err_msg = f"At least one of the mandatory files {mand_files} was " \
|
175 |
"not found."
|
176 |
logger.error(err_msg) |
177 |
raise FileNotFoundError(err_msg)
|
178 |
incar = ""
|
179 |
for i, inp_file in enumerate(inp_files): |
180 |
file_name = inp_file.split("/")[-1] |
181 |
if "INCAR" in file_name: |
182 |
incar = Incar.from_file(inp_file) |
183 |
incar["SYSTEM"] = proj_name+"_"+run_type |
184 |
|
185 |
for c, conf in enumerate(atms_list): |
186 |
subdir = f'{run_type}/conf_{c}/'
|
187 |
os.mkdir(subdir) |
188 |
for inp_file in inp_files: |
189 |
file_name = inp_file.split("/")[-1] |
190 |
if "INCAR" in file_name: |
191 |
incar.write_file(subdir+"INCAR")
|
192 |
elif "KPOINTS" in file_name and "KPOINTS" != file_name: |
193 |
copy(inp_file, subdir+"KPOINTS")
|
194 |
elif "POTCAR" in file_name and "POTCAR" != file_name: |
195 |
copy(inp_file, subdir+"POTCAR")
|
196 |
else:
|
197 |
copy(inp_file, subdir) |
198 |
if cell is not False and np.linalg.det(cell) != 0.0: |
199 |
conf.pbc = True
|
200 |
conf.cell = cell |
201 |
conf.center() |
202 |
elif np.linalg.det(conf.cell) == 0: |
203 |
err_msg = "Cell is not defined"
|
204 |
logger.error(err_msg) |
205 |
raise ValueError(err_msg) |
206 |
conf.write(subdir+"POSCAR", format="vasp") |
207 |
if "POTCAR" not in mand_files and potcar_dir: |
208 |
poscar_fh = open(subdir+"POSCAR", "r") |
209 |
grouped_symbols = poscar_fh.readline().split() |
210 |
poscar_fh.close() |
211 |
for symbol in grouped_symbols: |
212 |
potcar_sym_fh = open(f"{potcar_dir}/{symbol}/POTCAR", "r") |
213 |
potcar_sym_str = potcar_sym_fh.read() |
214 |
potcar_sym_fh.close() |
215 |
potcar_fh = open(subdir+"POTCAR", "a") |
216 |
potcar_fh.write(potcar_sym_str) |
217 |
potcar_fh.close() |
218 |
|
219 |
|
220 |
def get_jobs_status(job_ids, stat_cmd, stat_dict): |
221 |
"""Returns a list of job status for a list of job ids.
|
222 |
|
223 |
@param job_ids: list of all jobs to be checked their status.
|
224 |
@param stat_cmd: Command to check job status.
|
225 |
@param stat_dict: Dictionary with pairs of job status (r, p, f) and the
|
226 |
pattern it matches in the output of the stat_cmd.
|
227 |
@return: list of status for every job.
|
228 |
"""
|
229 |
from subprocess import PIPE, Popen |
230 |
status_list = [] |
231 |
for job in job_ids: |
232 |
stat_msg = Popen(stat_cmd % job, shell=True,
|
233 |
stdout=PIPE).communicate()[0].decode('utf-8').strip() |
234 |
if stat_dict['r'] == stat_msg: |
235 |
status_list.append('r')
|
236 |
elif stat_dict['p'] == stat_msg: |
237 |
status_list.append('p')
|
238 |
elif stat_dict['f'] == stat_msg: |
239 |
status_list.append('f')
|
240 |
else:
|
241 |
logger.warning(f'Unrecognized job {job} status: {stat_msg}')
|
242 |
return status_list
|
243 |
|
244 |
|
245 |
def submit_jobs(run_type, sub_cmd, sub_script, stat_cmd, stat_dict, max_jobs, |
246 |
name): |
247 |
"""Submits jobs to a custom queuing system with the provided script
|
248 |
|
249 |
@param run_type: Type of calculation. 'isolated', 'screening', 'refinement'
|
250 |
@param sub_cmd: Bash command used to submit jobs.
|
251 |
@param sub_script: script for the job submission.
|
252 |
@param stat_cmd: Bash command to check job status.
|
253 |
@param stat_dict: Dictionary with pairs of job status: r, p, f (ie. running
|
254 |
pending and finished) and the pattern it matches in the output of the
|
255 |
stat_cmd.
|
256 |
@param max_jobs: dict: Contains the maximum number of jobs to be both
|
257 |
running, pending/queued and pending+running. When the relevant maximum
|
258 |
is reached no jobs more are submitted.
|
259 |
@param name: name of the project.
|
260 |
"""
|
261 |
from shutil import copy |
262 |
from time import sleep |
263 |
from subprocess import PIPE, Popen |
264 |
from modules.utilities import _human_key |
265 |
subm_jobs = [] |
266 |
init_dir = os.getcwd() |
267 |
for conf in sorted(os.listdir(run_type), key=_human_key): |
268 |
i = conf.split('_')[1] |
269 |
while get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") + \ |
270 |
get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \
|
271 |
>= max_jobs['rp']\
|
272 |
or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("r") \ |
273 |
>= max_jobs['r'] \
|
274 |
or get_jobs_status(subm_jobs, stat_cmd, stat_dict).count("p") \ |
275 |
>= max_jobs['p']:
|
276 |
sleep(30)
|
277 |
copy(sub_script, f"{run_type}/{conf}")
|
278 |
os.chdir(f"{run_type}/{conf}")
|
279 |
job_name = f'{name[:5]}{run_type[:3].capitalize()}{i}'
|
280 |
sub_order = sub_cmd % (job_name, sub_script) |
281 |
subm_msg = Popen(sub_order, shell=True, stdout=PIPE).communicate()[0] |
282 |
job_id = None
|
283 |
for word in subm_msg.decode("utf-8").split(): |
284 |
try:
|
285 |
job_id = int(word.replace('>', '').replace('<', '')) |
286 |
break
|
287 |
except ValueError: |
288 |
continue
|
289 |
subm_jobs.append(job_id) |
290 |
os.chdir(init_dir) |
291 |
|
292 |
logger.info('All jobs have been submitted, waiting for them to finish.')
|
293 |
while not all([stat == 'f' for stat in |
294 |
get_jobs_status(subm_jobs, stat_cmd, stat_dict)]): |
295 |
sleep(30)
|
296 |
logger.info('All jobs have finished.')
|
297 |
|
298 |
|
299 |
def run_calc(run_type, inp_vars, atms_list): |
300 |
"""Directs the calculation run according to the provided arguments.
|
301 |
|
302 |
@param run_type: Type of calculation. 'isolated', 'screening' or
|
303 |
'refinement'
|
304 |
@param inp_vars: Calculation parameters from input file.
|
305 |
@param atms_list: List of ase.Atoms objects containing the sets of atoms
|
306 |
aimed to run the calculations of.
|
307 |
"""
|
308 |
from modules.utilities import check_bak |
309 |
|
310 |
run_types = ['isolated', 'screening', 'refinement'] |
311 |
if not isinstance(run_type, str) or run_type.lower() not in run_types: |
312 |
run_type_err = f"'run_type' must be one of the following: {run_types}"
|
313 |
logger.error(run_type_err) |
314 |
raise ValueError(run_type_err) |
315 |
|
316 |
if inp_vars['batch_q_sys']: |
317 |
logger.info(f"Running {run_type} calculation with {inp_vars['code']} on"
|
318 |
f" {inp_vars['batch_q_sys']}.")
|
319 |
else:
|
320 |
logger.info(f"Doing a dry run of {run_type}.")
|
321 |
check_bak(run_type) |
322 |
os.mkdir(run_type) |
323 |
|
324 |
# Prepare directories and files for relevant code.
|
325 |
input_files = {'isolated': 'isol_inp_file', 'screening': 'screen_inp_file', |
326 |
'refinement': 'refine_inp_file', } |
327 |
if inp_vars['code'] == 'cp2k': |
328 |
prep_cp2k(inp_vars[input_files[run_type]], run_type, atms_list, |
329 |
inp_vars['project_name'])
|
330 |
elif inp_vars['code'] == "vasp": |
331 |
prep_vasp(inp_vars[input_files[run_type]], run_type, atms_list, |
332 |
inp_vars['project_name'], inp_vars['pbc_cell'], |
333 |
inp_vars['potcar_dir'])
|
334 |
# TODO Implement code == none
|
335 |
# elif: inp_vars['code'] == 'Other codes here'
|
336 |
|
337 |
# Submit/run Jobs
|
338 |
if inp_vars['batch_q_sys'] == 'sge': |
339 |
stat_cmd = "qstat | grep %s | awk '{print $5}'"
|
340 |
stat_dict = {'r': 'r', 'p': 'qw', 'f': ''} |
341 |
submit_jobs(run_type, 'qsub -N %s %s', inp_vars['subm_script'], |
342 |
stat_cmd, stat_dict, inp_vars['max_jobs'],
|
343 |
inp_vars['project_name'])
|
344 |
elif inp_vars['batch_q_sys'] == 'lsf': |
345 |
stat_cmd = "bjobs -w | grep %s | awk '{print $3}'"
|
346 |
stat_dict = {'r': 'RUN', 'p': 'PEND', 'f': ''} |
347 |
submit_jobs(run_type, 'bsub -J %s < %s', inp_vars['subm_script'], |
348 |
stat_cmd, stat_dict, inp_vars['max_jobs'],
|
349 |
inp_vars['project_name'])
|
350 |
elif inp_vars['batch_q_sys'] == 'irene': |
351 |
stat_cmd = "ccc_mstat | grep %s | awk '{print $10}' | cut -c1"
|
352 |
stat_dict = {'r': 'R', 'p': 'P', 'f': ''} |
353 |
submit_jobs(run_type, 'ccc_msub -r %s %s', inp_vars['subm_script'], |
354 |
stat_cmd, stat_dict, inp_vars['max_jobs'],
|
355 |
inp_vars['project_name'])
|
356 |
|
357 |
elif inp_vars['batch_q_sys'] == 'local': |
358 |
pass # TODO implement local |
359 |
elif not inp_vars['batch_q_sys']: |
360 |
pass
|
361 |
else:
|
362 |
err_msg = "Unknown value for 'batch_q_sys'."
|
363 |
logger.error(err_msg) |
364 |
raise ValueError(err_msg) |