dockonsurf / modules / isolated.py @ 19567be2
Historique | Voir | Annoter | Télécharger (7,75 ko)
1 |
"""Functions to generate the conformers to be adsorbed and the most stable one.
|
---|---|
2 |
|
3 |
functions:
|
4 |
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
|
5 |
gen_confs: Generate a number of conformers in random orientations.
|
6 |
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
|
7 |
get_moments_of_inertia: Computes moments of inertia of the given conformers.
|
8 |
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
|
9 |
new mol object and the energies of its conformers.
|
10 |
run_isolated: directs the execution of functions to achieve the goal
|
11 |
"""
|
12 |
import logging |
13 |
|
14 |
import numpy as np |
15 |
import rdkit.Chem.AllChem as Chem |
16 |
|
17 |
logger = logging.getLogger('DockOnSurf')
|
18 |
|
19 |
|
20 |
def remove_C_linked_Hs(mol: Chem.rdchem.Mol): |
21 |
"""Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
|
22 |
|
23 |
@param mol: rdkit mol object of the molecule with hydrogen atoms.
|
24 |
@return: rdkit mol object of the molecule without hydrogen atoms linked to
|
25 |
a carbon atom.
|
26 |
|
27 |
The functions removes the hydrogen atoms bonded to carbon atoms while
|
28 |
keeping the ones bonded to other atoms or non-bonded at all.
|
29 |
"""
|
30 |
|
31 |
mol = Chem.RWMol(mol) |
32 |
rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())] |
33 |
|
34 |
for atm_idx in rev_atm_idxs: |
35 |
atom = mol.GetAtomWithIdx(atm_idx) |
36 |
if atom.GetAtomicNum() != 1: |
37 |
continue
|
38 |
for neigh in atom.GetNeighbors(): |
39 |
if neigh.GetAtomicNum() == 6: |
40 |
mol.RemoveAtom(atom.GetIdx()) |
41 |
return mol
|
42 |
|
43 |
|
44 |
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int): |
45 |
"""Generate conformers in random orientations.
|
46 |
|
47 |
@param mol: rdkit mol object of the molecule to be adsorbed.
|
48 |
@param num_confs: number of conformers to randomly generate.
|
49 |
@return: mol: rdkit mol object containing the different conformers.
|
50 |
rmsd_mtx: Matrix with the rmsd values of conformers.
|
51 |
|
52 |
Using the rdkit library, conformers are randomly generated. If structures
|
53 |
are required to be local minima, ie. setting the 'local_min' value to
|
54 |
True, a geometry optimisation using UFF is performed.
|
55 |
"""
|
56 |
logger.debug('Generating Conformers.')
|
57 |
|
58 |
mol = Chem.AddHs(mol) |
59 |
Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0,
|
60 |
randomSeed=5817216)
|
61 |
Chem.AlignMolConformers(mol) |
62 |
logger.info(f'Generated {len(mol.GetConformers())} conformers.')
|
63 |
return mol
|
64 |
|
65 |
|
66 |
def get_moments_of_inertia(mol: Chem.rdchem.Mol): |
67 |
"""Computes the moments of inertia of the given conformers
|
68 |
|
69 |
@param mol: rdkit mol object of the relevant molecule.
|
70 |
@return numpy array 2D: The inner array contains the moments of inertia for
|
71 |
the three principal axis of a given conformer. They are ordered by its value
|
72 |
in ascending order. The outer tuple loops over the conformers.
|
73 |
"""
|
74 |
from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3 |
75 |
|
76 |
return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)] |
77 |
for conf in range(mol.GetNumConformers())]) |
78 |
|
79 |
|
80 |
def pre_opt_confs(mol: Chem.rdchem.Mol, force_field='mmff', max_iters=2000): |
81 |
"""Optimizes the geometry of the given conformers and returns the new mol
|
82 |
object and the energies of its conformers.
|
83 |
|
84 |
@param mol: rdkit mol object of the relevant molecule.
|
85 |
@param force_field: Force Field to use for the pre-optimization.
|
86 |
@param max_iters: Maximum number of geometry optimization iterations. With 0
|
87 |
a single point energy calculation is performed and only the conformer
|
88 |
energies are returned.
|
89 |
@return mol: rdkit mol object of the optimized molecule.
|
90 |
@return numpy.ndarray: Array with the energies of the optimized conformers.
|
91 |
|
92 |
The MMFF and UFF force fields can be used for the geometry optimization in
|
93 |
their rdkit implementation. With max_iters value set to 0, a single point
|
94 |
energy calculation is performed and only the energies are returned. For
|
95 |
values larger than 0, if the geometry does not converge for a certain
|
96 |
conformer, the latter is removed from the list of conformers and its energy
|
97 |
is not included in the returned list.
|
98 |
"""
|
99 |
from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs, \ |
100 |
UFFOptimizeMoleculeConfs
|
101 |
|
102 |
init_num_confs = mol.GetNumConformers() |
103 |
if force_field == 'mmff': |
104 |
results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
|
105 |
maxIters=max_iters, |
106 |
nonBondedThresh=10))
|
107 |
elif force_field == 'uff': |
108 |
results = np.array(UFFOptimizeMoleculeConfs(mol, numThreads=0,
|
109 |
maxIters=max_iters, |
110 |
vdwThresh=10))
|
111 |
else:
|
112 |
logger.error("force_field parameter must be 'MMFF' or 'UFF'")
|
113 |
raise ValueError("force_field parameter must be 'MMFF' or 'UFF'") |
114 |
|
115 |
# Remove non-converged conformers if optimization is on, ie. maxIters > 0
|
116 |
# return all conformers if optimization is switched off, ie. maxIters = 0
|
117 |
if max_iters > 0: |
118 |
for i, conv in enumerate(results[:, 0]): |
119 |
if conv != 0: |
120 |
mol.RemoveConformer(i) |
121 |
for i, conf in enumerate(mol.GetConformers()): |
122 |
conf.SetId(i) |
123 |
if mol.GetNumConformers() < init_num_confs:
|
124 |
logger.warning(f'Geometry optimization did not comverge for at'
|
125 |
f'least one conformer. Continuing with '
|
126 |
f'{mol.GetNumConformers()} converged conformers.')
|
127 |
logger.info(f'Pre-optimized conformers with {force_field}.')
|
128 |
return mol, np.array([res[1] for res in results if res[0] == 0]) |
129 |
else:
|
130 |
logger.info(f'Computed conformers energy with {force_field}.')
|
131 |
return np.array([res[1] for res in results]) |
132 |
|
133 |
|
134 |
def run_isolated(inp_vars): |
135 |
"""Directs the execution of functions to obtain the conformers to adsorb
|
136 |
|
137 |
@param inp_vars: Calculation parameters from input file.
|
138 |
@return:
|
139 |
"""
|
140 |
from modules.formats import adapt_format, confs_to_mol_list, \ |
141 |
rdkit_mol_to_ase_atoms, collect_confs |
142 |
from modules.clustering import clustering, get_rmsd |
143 |
from modules.calculation import run_calc, check_finished_calcs |
144 |
from modules.refinement import select_stable_confs |
145 |
|
146 |
logger.info('Carrying out procedures for the isolated molecule.')
|
147 |
rd_mol = adapt_format('rdkit', inp_vars['molec_file'], |
148 |
inp_vars['special_atoms'])
|
149 |
confs = gen_confs(rd_mol, inp_vars['num_conformers'])
|
150 |
if inp_vars['pre_opt']: |
151 |
confs, confs_ener = pre_opt_confs(confs, inp_vars['pre_opt'])
|
152 |
else:
|
153 |
confs_ener = pre_opt_confs(confs, max_iters=0)
|
154 |
conf_list = confs_to_mol_list(confs) |
155 |
rmsd_mtx = get_rmsd(conf_list) |
156 |
confs_moi = get_moments_of_inertia(confs) |
157 |
exemplars = clustering(rmsd_mtx) |
158 |
mol_list = confs_to_mol_list(confs, exemplars) |
159 |
ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list] |
160 |
if len(ase_atms_list) == 0: |
161 |
err_msg = "No configurations were generated: Check the parameters in" \
|
162 |
"dockonsurf.inp"
|
163 |
logger.error(err_msg) |
164 |
raise ValueError(err_msg) |
165 |
run_calc('isolated', inp_vars, ase_atms_list)
|
166 |
logger.info("Finished the procedures for the isolated molecule section. ")
|
167 |
if inp_vars["batch_q_sys"]: |
168 |
finished_calcs, failed_calcs = check_finished_calcs('isolated',
|
169 |
inp_vars['code'])
|
170 |
conf_list = collect_confs(finished_calcs, inp_vars['code'], 'isolated', |
171 |
inp_vars['special_atoms'])
|
172 |
most_stable_conf = select_stable_confs(conf_list, 0)[0] |
173 |
|
174 |
logger.info(f"Most stable conformers is {most_stable_conf.info['iso']},"
|
175 |
f" with a total energy of {most_stable_conf.info['energy']}"
|
176 |
f" eV.")
|