Révision f3004731
b/modules/calculation.py | ||
---|---|---|
1 | 1 |
import os |
2 | 2 |
import logging |
3 | 3 |
|
4 |
from rdkit.Chem import AllChem as Chem |
|
5 |
|
|
6 | 4 |
logger = logging.getLogger('DockOnSurf') |
7 | 5 |
|
8 | 6 |
|
9 |
def prep_iso(inp_file, mol: Chem.rdchem.Mol, exemplars): |
|
10 |
"""Prepares the directories to run the calculations |
|
11 |
|
|
12 |
@param inp_file: Input file of the code to run the calculations with. |
|
13 |
@param mol: rdkit mol object of the relevant molecule. |
|
14 |
@param exemplars: list of conformer indices selected as representative. |
|
15 |
@return: None |
|
7 |
def create_bak_calc_dir(run_type): |
|
8 |
"""Checks if calculations directory already exists, backs it up if so and |
|
9 |
creates an empty one. |
|
10 |
@param run_type: Type of calculation. 'isolated', 'screening' or |
|
11 |
'refinement' |
|
16 | 12 |
""" |
17 |
from shutil import copy |
|
18 |
|
|
19 |
# Checking if 'isolated' directory already exists and backing it up if so. |
|
20 |
dir_name = 'isolated' |
|
13 |
dir_name = run_type |
|
21 | 14 |
bak_num = 0 |
22 | 15 |
while dir_name in os.listdir("."): |
23 | 16 |
bak_num += 1 |
24 | 17 |
dir_name = dir_name.split(".")[0] + f".bak{bak_num}" |
25 | 18 |
if bak_num > 0: |
26 |
os.rename('isolated', dir_name)
|
|
27 |
logger.warning("'isolated' directory already present. Moved former "
|
|
19 |
os.rename(run_type, dir_name)
|
|
20 |
logger.warning(f"'{run_type}' directory already present. Moved former "
|
|
28 | 21 |
f"directory to {dir_name}") |
29 |
os.mkdir('isolated')
|
|
22 |
os.mkdir(run_type)
|
|
30 | 23 |
|
31 |
# Creating and setting up directories for every conformer calculation. |
|
32 |
for i, conf in enumerate(exemplars): |
|
33 |
os.mkdir(f'isolated/conf_{i}') |
|
34 |
copy(inp_file, f'isolated/conf_{i}/') |
|
35 |
Chem.MolToXYZFile(mol, f'isolated/conf_{i}/coord.xyz', confId=int(conf)) |
|
36 | 24 |
|
25 |
def prep_cp2k(inp_file, run_type, atms_list): |
|
26 |
"""Prepares the directories to run isolated calculation with CP2K. |
|
37 | 27 |
|
38 |
def run_calc(run_type, inp_vars, **kwargs): |
|
28 |
@param inp_file: CP2K Input file to run the calculations with. |
|
29 |
@param run_type: Type of calculation. 'isolated', 'screening' or |
|
30 |
'refinement' |
|
31 |
@param atms_list: list of ase.Atoms objects to run the calculation of. |
|
32 |
@return: None |
|
33 |
""" |
|
34 |
from shutil import copy |
|
35 |
import ase.io |
|
36 |
from pycp2k import CP2K |
|
37 |
cp2k = CP2K() |
|
38 |
cp2k.parse(inp_file) |
|
39 |
force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0] |
|
40 |
coord_file = force_eval.SUBSYS.TOPOLOGY.Coord_file_name |
|
41 |
|
|
42 |
# Creating and setting up directories for every atoms configuration. |
|
43 |
for i, conf in enumerate(atms_list): |
|
44 |
os.mkdir(f'{run_type}/conf_{i}') |
|
45 |
copy(inp_file, f'{run_type}/conf_{i}/') |
|
46 |
ase.io.write(f'{run_type}/conf_{i}/{coord_file}', conf) |
|
47 |
|
|
48 |
|
|
49 |
def run_calc(run_type, inp_vars, atms_list): |
|
39 | 50 |
"""Directs the calculation run according to the provided arguments. |
40 | 51 |
|
41 |
@param run_type: Type of calculation. Isolated, screening or refinement |
|
52 |
@param run_type: Type of calculation. 'isolated', 'screening' or |
|
53 |
'refinement' |
|
42 | 54 |
@param inp_vars: Calculation parameters from input file. |
43 |
@type kwargs: keyword arguments relevant to the specified run_type: |
|
44 |
for isolated: |
|
45 |
confs: rdkit mol object of the relevant molecule containing the |
|
46 |
conformers |
|
47 |
exemplars: list of conformer indices that are exemplars for every |
|
48 |
cluster. |
|
49 |
for screening: |
|
55 |
@param atms_list: List of ase.Atoms objects containing the sets of atoms |
|
56 |
aimed to run the calculations of. |
|
50 | 57 |
""" |
51 | 58 |
run_types = ['isolated', 'screening', 'refinement'] |
52 | 59 |
run_type_err = f"'run_type' should be one of the following: {run_types}" |
... | ... | |
56 | 63 |
|
57 | 64 |
logger.info(f"Running {run_type} calculation with {inp_vars['code']} on " |
58 | 65 |
f"{inp_vars['batch_q_sys']}") |
59 |
|
|
66 |
create_bak_calc_dir(run_type) |
|
60 | 67 |
if run_type == 'isolated': |
61 |
key_err = "If 'run_type' is 'isolated', 'run_calc' needs the following"\ |
|
62 |
"arguments: 'inp_file', 'confs' and 'exemplars'." |
|
63 |
err = False |
|
64 |
try: |
|
65 |
confs = kwargs['confs'] |
|
66 |
exemplars = kwargs['exemplars'] |
|
67 |
except KeyError as e: |
|
68 |
logger.error(key_err) |
|
69 |
err = e |
|
70 |
else: |
|
71 |
err = False |
|
72 |
finally: |
|
73 |
if isinstance(err, BaseException): |
|
74 |
raise err |
|
75 |
|
|
76 |
prep_iso(inp_vars['isol_inp_file'], confs, exemplars) |
|
68 |
if inp_vars['code'] == 'cp2k': |
|
69 |
prep_cp2k(inp_vars['isol_inp_file'], run_type, atms_list) |
b/modules/formats.py | ||
---|---|---|
1 | 1 |
"""Module for the conversion of coordinate files to library-workable objects |
2 | 2 |
|
3 | 3 |
functions: |
4 |
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list |
|
5 |
of separate mol objects. |
|
6 |
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object. |
|
4 | 7 |
adapt_format: Converts the coordinate files into a required library object type |
5 | 8 |
""" |
6 | 9 |
|
7 | 10 |
import logging |
11 |
|
|
12 |
import rdkit.Chem.AllChem as Chem |
|
13 |
|
|
8 | 14 |
logger = logging.getLogger('DockOnSurf') |
9 | 15 |
|
10 | 16 |
|
17 |
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None): |
|
18 |
"""Converts the conformers inside a rdkit mol object to a list of |
|
19 |
separate mol objects. |
|
20 |
|
|
21 |
@param mol: rdkit mol object containing at least one conformer. |
|
22 |
@param idx_lst: |
|
23 |
@return: list of separate mol objects. |
|
24 |
""" |
|
25 |
if idx_lst is None: |
|
26 |
idx_lst = list(range(mol.GetNumConformers())) |
|
27 |
return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)), |
|
28 |
removeHs=False) for idx in idx_lst] |
|
29 |
|
|
30 |
|
|
31 |
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol): |
|
32 |
"""Converts a rdkit mol object into ase Atoms object. |
|
33 |
@param mol: rdkit mol object containing only one conformer. |
|
34 |
@return ase.Atoms: ase Atoms object with the same coordinates. |
|
35 |
""" |
|
36 |
from ase import Atoms |
|
37 |
symbols = [atm.GetSymbol() for atm in mol.GetAtoms()] |
|
38 |
positions = mol.GetConformer(0).GetPositions() |
|
39 |
return Atoms(symbols=symbols, positions=positions) |
|
40 |
|
|
41 |
|
|
11 | 42 |
def adapt_format(requirement, coord_file): |
12 | 43 |
"""Converts the coordinate files into a required library object type. |
13 | 44 |
|
... | ... | |
27 | 58 |
f" has not yet been implemented" |
28 | 59 |
conv_info = f"Converted {coord_file} to {requirement} object type" |
29 | 60 |
|
30 |
fil_type_err = f'The {filetype( coord_file )} file formnat is not supported'
|
|
61 |
fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
|
|
31 | 62 |
|
32 | 63 |
if requirement not in req_vals: |
33 | 64 |
logger.error(lib_err) |
b/modules/isolated.py | ||
---|---|---|
20 | 20 |
logger = logging.getLogger('DockOnSurf') |
21 | 21 |
|
22 | 22 |
|
23 |
def confs_to_mol_list(mol: Chem.rdchem.Mol): |
|
24 |
"""Converts the conformers inside a rdkit mol object to a list of |
|
25 |
separate mol objects. |
|
26 |
|
|
27 |
@param mol: rdkit mol object containing at least one conformer. |
|
28 |
@return: list of separate mol objects. |
|
29 |
""" |
|
30 |
return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId())) |
|
31 |
for conf in mol.GetConformers()] |
|
32 |
|
|
33 |
|
|
34 | 23 |
def remove_C_linked_Hs(mol: Chem.rdchem.Mol): |
35 | 24 |
"""Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object. |
36 | 25 |
|
... | ... | |
180 | 169 |
@param inp_vars: Calculation parameters from input file. |
181 | 170 |
@return: |
182 | 171 |
""" |
183 |
from modules.formats import adapt_format |
|
172 |
from modules.formats import adapt_format, confs_to_mol_list, \ |
|
173 |
rdkit_mol_to_ase_atoms |
|
184 | 174 |
from modules.clustering import clustering |
185 | 175 |
from modules.calculation import run_calc |
186 | 176 |
|
... | ... | |
194 | 184 |
rmsd_mtx = get_rmsd(confs) |
195 | 185 |
confs_moi = get_moments_of_inertia(confs) |
196 | 186 |
exemplars = clustering(rmsd_mtx) |
197 |
run_calc('isolated', inp_vars, confs=confs, exemplars=exemplars) |
|
187 |
mol_list = confs_to_mol_list(confs, exemplars) |
|
188 |
ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list] |
|
189 |
run_calc('isolated', inp_vars, ase_atms_list) |
|
198 | 190 |
|
199 | 191 |
if 'moi' in inp_vars['cluster_magns']: |
200 | 192 |
confs_moi = get_moments_of_inertia(confs) |
b/requirements.txt | ||
---|---|---|
1 |
rdkit~=2019.09.1.0
|
|
2 |
ase~=3.19.0
|
|
1 |
rdkit~=2020.3.2
|
|
2 |
ase~=3.19.1
|
|
3 | 3 |
numpy~=1.16.6 |
4 | 4 |
networkx~=2.4 |
5 | 5 |
hdbscan~=0.8.26 |
6 |
matplotlib~=3.1.3 |
|
7 |
scikit-learn~=0.22.1 |
|
6 |
matplotlib~=3.2.1 |
|
7 |
scikit-learn~=0.22.2.post1 |
|
8 |
pycp2k |
Formats disponibles : Unified diff