Révision 1d8c374e
b/modules/calculation.py | ||
---|---|---|
17 | 17 |
""" |
18 | 18 |
from glob import glob |
19 | 19 |
import ase.io |
20 |
from pycp2k import CP2K |
|
21 |
from modules.utilities import tail |
|
20 |
from modules.utilities import tail, is_binary |
|
22 | 21 |
|
23 | 22 |
finished_calcs = [] |
24 | 23 |
unfinished_calcs = [] |
25 | 24 |
for conf_dir in sorted(os.listdir(run_type), key=_human_key): |
26 |
conf_path = f'{run_type}/{conf_dir}' |
|
25 |
conf_path = f'{run_type}/{conf_dir}/'
|
|
27 | 26 |
if not os.path.isdir(conf_path) or 'conf_' not in conf_dir: |
28 | 27 |
continue |
29 | 28 |
if code == 'cp2k': |
30 |
cp2k = CP2K() |
|
31 | 29 |
restart_file_list = glob(f"{conf_path}/*-1.restart") |
32 | 30 |
if len(restart_file_list) == 0: |
33 | 31 |
logger.warning(f"No *-1.restart file found on {conf_path}.") |
... | ... | |
42 | 40 |
continue |
43 | 41 |
out_files = [] |
44 | 42 |
for file in os.listdir(conf_path): |
45 |
with open(conf_path+"/"+file, "rb") as out_fh: |
|
43 |
if is_binary(conf_path+file): |
|
44 |
continue |
|
45 |
with open(conf_path+file, "rb") as out_fh: |
|
46 | 46 |
tail_out_str = tail(out_fh) |
47 | 47 |
if tail_out_str.count("PROGRAM STOPPED IN") == 1: |
48 | 48 |
out_files.append(file) |
b/modules/formats.py | ||
---|---|---|
166 | 166 |
force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0] |
167 | 167 |
raw_coords = force_eval.SUBSYS.COORD.Default_keyword |
168 | 168 |
symbols = [atom.split()[0] for atom in raw_coords] |
169 |
positions = np.array([atom.split()[1:] for atom in raw_coords]) |
|
169 |
positions = np.array([[float(coord) for coord in atom.split()[1:]] |
|
170 |
for atom in raw_coords]) |
|
170 | 171 |
if len(spec_atoms) > 0: |
171 | 172 |
add_special_atoms(spec_atoms) |
172 | 173 |
return Atoms(symbols=symbols, positions=positions) |
... | ... | |
190 | 191 |
return ase.io.read(file) |
191 | 192 |
|
192 | 193 |
|
193 |
def collect_coords(conf_list, code, run_type, spec_atms=tuple()): |
|
194 |
"""Directs the reading of coordinates on a set of subdirectories. |
|
195 |
|
|
196 |
Given a dockonsurf directory hierarchy: project/run_type/conf_X |
|
197 |
(run_type = ['isolated', 'screening' or 'refinement']) with finished |
|
198 |
calculations produced by a given code, stored in every conf_X subdirectory, |
|
199 |
it collects the coordinates of the specified conf_X subdirectories in a |
|
200 |
single run_type by calling the adequate function (depending on the code) and |
|
201 |
returns a list of ase.Atoms objects. |
|
202 |
|
|
203 |
@param conf_list: List of directories where to read the coords from. |
|
204 |
@param code: the code that produced the calculation results files. |
|
205 |
@param run_type: the type of calculation (and also the name of the folder) |
|
206 |
containing the calculation subdirectories. |
|
207 |
@param spec_atms: List of tuples containing pairs of new/traditional |
|
208 |
chemical symbols. |
|
209 |
@return: list of ase.Atoms objects. |
|
210 |
""" |
|
211 |
from glob import glob |
|
212 |
atoms_list = [] |
|
213 |
for conf in conf_list: |
|
214 |
if code == 'cp2k': |
|
215 |
atoms_list.append(read_coords_cp2k(glob(f"{run_type}/{conf}/*-1" |
|
216 |
f".restart")[0], spec_atms)) |
|
217 |
elif code == 'vasp': |
|
218 |
atoms_list.append(read_coords_vasp(f"{run_type}/{conf}/OUTCAR", |
|
219 |
spec_atms)) |
|
220 |
else: |
|
221 |
err_msg = f"Collect coordinates not implemented for '{code}'." |
|
222 |
logger.error(err_msg) |
|
223 |
raise NotImplementedError(err_msg) |
|
224 |
return atoms_list |
|
225 |
|
|
226 |
|
|
227 | 194 |
def read_energy_cp2k(file): |
228 |
"""Reads the CP2K out file and returns its final energy. |
|
195 |
"""Reads the CP2K output file and returns its final energy.
|
|
229 | 196 |
|
230 | 197 |
@param file: The file from which the energy should be read. |
231 |
@return: The last energy on the out file. |
|
198 |
@return: The last energy on the out file in eV.
|
|
232 | 199 |
""" |
233 | 200 |
out_fh = open(file, 'r') |
234 | 201 |
energy = None |
... | ... | |
239 | 206 |
return energy |
240 | 207 |
|
241 | 208 |
|
242 |
def collect_energies(conf_list, code, run_type):
|
|
243 |
"""Directs the reading of energies on a set of subdirectories.
|
|
209 |
def collect_confs(dir_list, code, run_type, spec_atms=tuple()):
|
|
210 |
"""Reads the coordinates and energies of a list of finished calculations.
|
|
244 | 211 |
|
245 | 212 |
Given a dockonsurf directory hierarchy: project/run_type/conf_X |
246 |
(run_type = ['isolated', 'screening' or 'refinement']) with finished |
|
247 |
calculations produced by a given code, stored in every conf_X subdirectory, |
|
248 |
it collects the energies of the specified conf_X subdirectories in a |
|
249 |
single run_type by calling the adequate function (depending on the code) and |
|
250 |
returns a list of energies. |
|
251 |
|
|
252 |
@param conf_list: List of directories where to read the energy. |
|
253 |
@param code: The code that produced the calculation output files. |
|
254 |
@param run_type: The type of calculation (and also the name of the folder) |
|
255 |
containing the calculation subdirectories. |
|
256 |
@return: List of energies |
|
213 |
(run_type = ['isolated', 'screening' or 'refinement']) it reads the |
|
214 |
coordinates of each conf_X, it assigns its total energy from the calculation |
|
215 |
and assigns the conf_X label to track its origin. Finally it returns the |
|
216 |
ase.Atoms object. |
|
217 |
|
|
218 |
@param dir_list: List of directories where to read the coords from. |
|
219 |
@param code: the code that produced the calculation results files. |
|
220 |
@param run_type: the type of calculation (and also the name of the folder) |
|
221 |
containing the calculation subdirectories. |
|
222 |
@param spec_atms: List of tuples containing pairs of new/traditional |
|
223 |
chemical symbols. |
|
224 |
@return: list of ase.Atoms objects. |
|
257 | 225 |
""" |
258 | 226 |
from glob import glob |
259 |
import numpy as np |
|
260 |
|
|
261 |
energies = [] |
|
262 |
for conf in conf_list: |
|
227 |
import os |
|
228 |
from modules.utilities import is_binary |
|
229 |
atoms_list = [] |
|
230 |
for conf_dir in dir_list: |
|
231 |
conf_path = f"{run_type}/{conf_dir}/" |
|
263 | 232 |
if code == 'cp2k': |
264 |
energies.append(read_energy_cp2k( |
|
265 |
glob(f"{run_type}/{conf}/*.out")[0])) |
|
233 |
ase_atms = read_coords_cp2k(glob(f"{conf_path}/*-1.restart")[0], |
|
234 |
spec_atms) |
|
235 |
# Assign energy |
|
236 |
for fil in os.listdir(conf_path): |
|
237 |
if is_binary(conf_path+fil): |
|
238 |
continue |
|
239 |
conf_energy = read_energy_cp2k(conf_path+fil) |
|
240 |
if conf_energy is not None: |
|
241 |
ase_atms.info["energy"] = conf_energy |
|
242 |
break |
|
243 |
ase_atms.info[run_type[:3]] = conf_dir |
|
244 |
atoms_list.append(ase_atms) |
|
245 |
elif code == 'vasp': |
|
246 |
ase_atms = read_coords_vasp(f"{conf_path}/OUTCAR", spec_atms) |
|
247 |
ase_atms.info["energy"] = ase_atms.get_total_energy() * 27.2113845 |
|
248 |
ase_atms.info[run_type[:3]] = conf_dir |
|
249 |
atoms_list.append(ase_atms) |
|
266 | 250 |
else: |
267 |
err_msg = f"Collect energies not implemented for '{code}'."
|
|
251 |
err_msg = f"Collect coordinates not implemented for '{code}'."
|
|
268 | 252 |
logger.error(err_msg) |
269 | 253 |
raise NotImplementedError(err_msg) |
254 |
return atoms_list |
|
270 | 255 |
|
271 |
if len(energies) == 0: |
|
272 |
err = f"No results found on {run_type}" |
|
273 |
logger.error(err) |
|
274 |
raise FileNotFoundError(err) |
|
275 |
|
|
276 |
return np.array(energies) |
b/modules/isolated.py | ||
---|---|---|
138 | 138 |
@return: |
139 | 139 |
""" |
140 | 140 |
from modules.formats import adapt_format, confs_to_mol_list, \ |
141 |
rdkit_mol_to_ase_atoms, collect_confs, collect_energies
|
|
141 |
rdkit_mol_to_ase_atoms, collect_confs |
|
142 | 142 |
from modules.clustering import clustering, get_rmsd |
143 | 143 |
from modules.calculation import run_calc, check_finished_calcs |
144 | 144 |
from modules.refinement import select_stable_confs |
... | ... | |
163 | 163 |
logger.error(err_msg) |
164 | 164 |
raise ValueError(err_msg) |
165 | 165 |
run_calc('isolated', inp_vars, ase_atms_list) |
166 |
finished_calcs, unfinished_calcs = check_finished_calcs('isolated',
|
|
166 |
finished_calcs, failed_calcs = check_finished_calcs('isolated',
|
|
167 | 167 |
inp_vars['code']) |
168 |
conf_list = collect_coords(finished_calcs, inp_vars['code'], 'isolated', |
|
169 |
inp_vars['special_atoms']) |
|
170 |
most_stable_conf = select_confs(conf_list, finished_calcs, 0, |
|
171 |
inp_vars['code'])[0] |
|
172 |
logger.info('Finished the procedures for the isolated molecule section. ' |
|
173 |
f'Most stable conformers is {most_stable_conf}.') |
|
168 |
conf_list = collect_confs(finished_calcs, inp_vars['code'], 'isolated', |
|
169 |
inp_vars['special_atoms']) |
|
170 |
most_stable_conf = select_stable_confs(conf_list, 0)[0] |
|
171 |
logger.info("Finished the procedures for the isolated molecule section. " |
|
172 |
f"Most stable conformers is {most_stable_conf.info['id']}, " |
|
173 |
f"with a total energy of {most_stable_conf.info['energy']} eV.") |
b/modules/refinement.py | ||
---|---|---|
3 | 3 |
logger = logging.getLogger('DockOnSurf') |
4 | 4 |
|
5 | 5 |
|
6 |
def select_confs(conf_list, calc_dirs, energy_cutoff, code):
|
|
6 |
def select_stable_confs(conf_list, energy_cutoff):
|
|
7 | 7 |
"""From a list of atomic configurations selects the most stable ones. |
8 | 8 |
|
9 | 9 |
Given a list of ase.Atoms configurations and an energy cutoff, selects all |
... | ... | |
11 | 11 |
conformer plus the cutoff. |
12 | 12 |
|
13 | 13 |
@param conf_list: List of ase.Atoms objects of the conformers |
14 |
@param calc_dirs: List of the directories of the finished calculations |
|
15 |
@param energy_cutoff: The maximum energy above the most stable configuration |
|
16 |
@param code: the code used to carry out the screening of structures. |
|
17 |
@return: list of the the most stable configurations within the energy cutoff |
|
14 |
@param energy_cutoff: The maximum energy above the most stable |
|
15 |
configuration. |
|
16 |
@return: list of the the most stable configurations within the energy cutoff. |
|
18 | 17 |
""" |
19 |
from copy import deepcopy |
|
20 |
from modules.formats import collect_energies |
|
21 |
|
|
22 |
conf_enrgs = collect_energies(calc_dirs, code, 'screening') |
|
23 |
|
|
24 |
for i, conf in enumerate(conf_list): |
|
25 |
conf.info['energy'] = conf_enrgs[i] |
|
26 |
conf.info['id'] = calc_dirs[i] |
|
27 |
|
|
28 | 18 |
sorted_list = sorted(conf_list, key=lambda conf: conf.info['energy']) |
29 | 19 |
lowest_e = sorted_list[0].info['energy'] |
30 | 20 |
return [conf for conf in sorted_list |
... | ... | |
37 | 27 |
@param inp_vars: Calculation parameters from input file. |
38 | 28 |
""" |
39 | 29 |
import os |
40 |
from modules.formats import collect_coords
|
|
30 |
from modules.formats import collect_confs
|
|
41 | 31 |
from modules.calculation import run_calc, check_finished_calcs |
42 | 32 |
|
43 | 33 |
logger.info('Carrying out procedures for the refinement of ' |
... | ... | |
49 | 39 |
logger.error(err) |
50 | 40 |
raise FileNotFoundError(err) |
51 | 41 |
|
52 |
finished_calcs, unfinished_calcs = check_finished_calcs('screening',
|
|
53 |
inp_vars['code'])
|
|
42 |
finished_calcs, failed_calcs = check_finished_calcs('screening',
|
|
43 |
inp_vars['code']) |
|
54 | 44 |
if not finished_calcs: |
55 | 45 |
err_msg = "No calculations on 'screening' finished normally." |
56 | 46 |
logger.error(err_msg) |
... | ... | |
58 | 48 |
logger.info(f"Found {len(finished_calcs)} structures of " |
59 | 49 |
f"adsorbate-surface atomic configurations whose calculation" |
60 | 50 |
f" finished normally.") |
61 |
if len(unfinished_calcs) != 0:
|
|
62 |
logger.warning(f"Found {len(unfinished_calcs)} calculations more that "
|
|
63 |
f"did not finish normally: {unfinished_calcs}. \n"
|
|
51 |
if len(failed_calcs) != 0:
|
|
52 |
logger.warning(f"Found {len(failed_calcs)} calculations more that "
|
|
53 |
f"did not finish normally: {failed_calcs}. \n"
|
|
64 | 54 |
f"Using only the ones that finished normally: " |
65 | 55 |
f"{finished_calcs}.") |
66 | 56 |
|
67 |
conf_list = collect_coords(finished_calcs, inp_vars['code'], 'screening', |
|
68 |
inp_vars['special_atoms']) |
|
69 |
selected_confs = select_confs(conf_list, finished_calcs, |
|
70 |
inp_vars['energy_cutoff'], inp_vars['code']) |
|
57 |
conf_list = collect_confs(finished_calcs, inp_vars['code'], 'screening', |
|
58 |
inp_vars['special_atoms']) |
|
59 |
selected_confs = select_stable_confs(conf_list, inp_vars['energy_cutoff']) |
|
71 | 60 |
logger.info(f"Selected {len(selected_confs)} structures to carry out the" |
72 | 61 |
f" refinement") |
73 | 62 |
run_calc('refinement', inp_vars, selected_confs) |
74 |
finished_calcs, unfinished_calcs = check_finished_calcs('refinement', |
|
75 |
inp_vars['code']) |
|
76 |
conf_list = collect_coords(finished_calcs, inp_vars['code'], 'refinement', |
|
77 |
inp_vars['special_atoms']) |
|
78 |
most_stable_conf = select_confs(conf_list, finished_calcs, 0, |
|
79 |
inp_vars['code'])[0] |
|
80 |
logger.info('Finished the procedures for the refinement of ' |
|
81 |
'adsorbate-surface structures section. Most stable structure ' |
|
82 |
f"is {most_stable_conf.info['id']}") |
|
63 |
finished_calcs, failed_calcs = check_finished_calcs('refinement', |
|
64 |
inp_vars['code']) |
|
65 |
conf_list = collect_confs(finished_calcs, inp_vars['code'], 'refinement', |
|
66 |
inp_vars['special_atoms']) |
|
67 |
most_stable_conf = select_stable_confs(conf_list, 0)[0] |
|
68 |
logger.info("Finished the procedures for the refinement of " |
|
69 |
"adsorbate-surface structures section. Most stable structure " |
|
70 |
f"is {most_stable_conf.info['id']} with a Total energy of " |
|
71 |
f"{most_stable_conf.info['energy']} eV.") |
b/modules/screening.py | ||
---|---|---|
5 | 5 |
logger = logging.getLogger('DockOnSurf') |
6 | 6 |
|
7 | 7 |
|
8 |
def assign_prop(atoms: ase.Atoms, prop_name: str, prop_val): # TODO Needed? |
|
9 |
atoms.info[prop_name] = prop_val |
|
10 |
|
|
11 |
|
|
12 |
def select_confs(orig_conf_list: list, calc_dirs: list, magns: list, |
|
13 |
num_sel: int, code: str): |
|
8 |
def select_confs(conf_list: list, magns: list, num_sel: int): |
|
14 | 9 |
"""Takes a list ase.Atoms and selects the most different magnitude-wise. |
15 | 10 |
|
16 | 11 |
Given a list of ase.Atoms objects and a list of magnitudes, it selects a |
17 | 12 |
number of the most different conformers according to every magnitude |
18 | 13 |
specified. |
19 | 14 |
|
20 |
@param orig_conf_list: list of ase.Atoms objects to select among. |
|
21 |
@param calc_dirs: List of directories where to read the energies from. |
|
15 |
@param conf_list: list of ase.Atoms objects to select among. |
|
22 | 16 |
@param magns: list of str with the names of the magnitudes to use for the |
23 | 17 |
conformer selection. Supported magnitudes: 'energy', 'moi'. |
24 | 18 |
@param num_sel: number of conformers to select for every of the magnitudes. |
25 |
@param code: The code that generated the magnitude information. |
|
26 |
Supported codes: See formats.py |
|
27 | 19 |
@return: list of the selected ase.Atoms objects. |
28 | 20 |
""" |
29 |
from copy import deepcopy |
|
30 |
from modules.formats import collect_energies |
|
31 |
|
|
32 |
conf_list = deepcopy(orig_conf_list) |
|
33 |
conf_enrgs, mois, selected_ids = [], [], [] |
|
21 |
selected_ids = [] |
|
34 | 22 |
if num_sel >= len(conf_list): |
35 | 23 |
logger.warning('Number of conformers per magnitude is equal or larger ' |
36 | 24 |
'than the total number of conformers. Using all ' |
37 | 25 |
f'available conformers: {len(conf_list)}.') |
38 | 26 |
return conf_list |
39 | 27 |
|
40 |
# Read properties |
|
41 |
if 'energy' in magns: |
|
42 |
if code == 'cp2k': |
|
43 |
conf_enrgs = collect_energies(calc_dirs, code, 'isolated') |
|
44 |
elif code == 'vasp': |
|
45 |
conf_enrgs = np.array([conf.get_total_energy() |
|
46 |
for conf in orig_conf_list]) |
|
28 |
# Assign mois |
|
47 | 29 |
if 'moi' in magns: |
48 |
mois = np.array([conf.get_moments_of_inertia() for conf in conf_list]) |
|
49 |
|
|
50 |
# Assign values |
|
51 |
for i, conf in enumerate(conf_list): |
|
52 |
assign_prop(conf, 'idx', i) |
|
53 |
assign_prop(conf, 'iso', calc_dirs[i]) |
|
54 |
if 'energy' in magns: |
|
55 |
assign_prop(conf, 'energy', conf_enrgs[i]) |
|
56 |
if 'moi' in magns: |
|
57 |
assign_prop(conf, 'moi', mois[i, 2]) |
|
30 |
for conf in conf_list: |
|
31 |
conf.info["moi"] = conf.get_moments_of_inertia()[2] |
|
58 | 32 |
|
59 | 33 |
# pick ids |
60 | 34 |
for magn in magns: |
61 | 35 |
sorted_list = sorted(conf_list, key=lambda conf: abs(conf.info[magn])) |
62 |
if sorted_list[-1].info['idx'] not in selected_ids:
|
|
63 |
selected_ids.append(sorted_list[-1].info['idx'])
|
|
36 |
if sorted_list[-1].info['iso'] not in selected_ids:
|
|
37 |
selected_ids.append(sorted_list[-1].info['iso'])
|
|
64 | 38 |
if num_sel > 1: |
65 | 39 |
for i in range(0, len(sorted_list) - 1, |
66 | 40 |
len(conf_list) // (num_sel - 1)): |
67 |
if sorted_list[i].info['idx'] not in selected_ids:
|
|
68 |
selected_ids.append(sorted_list[i].info['idx'])
|
|
41 |
if sorted_list[i].info['iso'] not in selected_ids:
|
|
42 |
selected_ids.append(sorted_list[i].info['iso'])
|
|
69 | 43 |
|
70 | 44 |
logger.info(f'Selected {len(selected_ids)} conformers for adsorption.') |
71 |
return [conf_list[idx] for idx in selected_ids]
|
|
45 |
return [conf for conf in conf_list if conf.info["iso"] in selected_ids]
|
|
72 | 46 |
|
73 | 47 |
|
74 | 48 |
def get_vect_angle(v1: list, v2: list, ref=None, degrees=True): |
... | ... | |
1026 | 1000 |
""" |
1027 | 1001 |
import os |
1028 | 1002 |
import random |
1029 |
from modules.formats import collect_coords, adapt_format
|
|
1003 |
from modules.formats import collect_confs, adapt_format
|
|
1030 | 1004 |
from modules.calculation import run_calc, check_finished_calcs |
1031 | 1005 |
|
1032 | 1006 |
logger.info('Carrying out procedures for the screening of adsorbate-surface' |
... | ... | |
1042 | 1016 |
logger.error(err) |
1043 | 1017 |
raise FileNotFoundError(err) |
1044 | 1018 |
|
1045 |
correct_calcs, failed_calcs = check_finished_calcs('isolated',
|
|
1046 |
inp_vars['code']) |
|
1047 |
if not correct_calcs:
|
|
1019 |
finished_calcs, failed_calcs = check_finished_calcs('isolated',
|
|
1020 |
inp_vars['code'])
|
|
1021 |
if not finished_calcs:
|
|
1048 | 1022 |
err_msg = "No calculations on 'isolated' finished normally." |
1049 | 1023 |
logger.error(err_msg) |
1050 | 1024 |
raise FileNotFoundError(err_msg) |
1051 | 1025 |
|
1052 |
logger.info(f"Found {len(correct_calcs)} structures of isolated "
|
|
1026 |
logger.info(f"Found {len(finished_calcs)} structures of isolated "
|
|
1053 | 1027 |
f"conformers whose calculation finished normally.") |
1054 | 1028 |
if len(failed_calcs) != 0: |
1055 | 1029 |
logger.warning( |
1056 | 1030 |
f"Found {len(failed_calcs)} calculations more that " |
1057 | 1031 |
f"did not finish normally: {failed_calcs}. \n" |
1058 | 1032 |
f"Using only the ones that finished normally: " |
1059 |
f"{correct_calcs}.") |
|
1060 |
|
|
1061 |
conformer_atoms_list = collect_coords(correct_calcs, inp_vars['code'], |
|
1062 |
'isolated', |
|
1063 |
inp_vars['special_atoms']) |
|
1064 |
selected_confs = select_confs(conformer_atoms_list, correct_calcs, |
|
1065 |
inp_vars['select_magns'], |
|
1066 |
inp_vars['confs_per_magn'], |
|
1067 |
inp_vars['code']) |
|
1033 |
f"{finished_calcs}.") |
|
1034 |
|
|
1035 |
conf_list = collect_confs(finished_calcs, inp_vars['code'], 'isolated', |
|
1036 |
inp_vars['special_atoms']) |
|
1037 |
selected_confs = select_confs(conf_list, inp_vars['select_magns'], |
|
1038 |
inp_vars['confs_per_magn']) |
|
1068 | 1039 |
surf = adapt_format('ase', inp_vars['surf_file'], inp_vars['special_atoms']) |
1069 | 1040 |
surf.info = {} |
1070 | 1041 |
surf_ads_list = adsorb_confs(selected_confs, surf, inp_vars) |
Formats disponibles : Unified diff