Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ f3004731

Historique | Voir | Annoter | Télécharger (7,78 ko)

1 4614bb6a Carles
"""Functions to generate the conformers to be adsorbed and the most stable one.
2 4614bb6a Carles

3 5c70d6c6 Carles
functions:
4 92924d24 Carles
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5 92924d24 Carles
    of independent mol objects.
6 5c70d6c6 Carles
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
7 92924d24 Carles
gen_confs: Generate a number of conformers in random orientations.
8 92924d24 Carles
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
9 92924d24 Carles
get_moments_of_inertia: Computes moments of inertia of the given conformers.
10 92924d24 Carles
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
11 92924d24 Carles
    new mol object and the energies of its conformers.
12 4614bb6a Carles
run_isolated: directs the execution of functions to achieve the goal
13 4614bb6a Carles
"""
14 fd33bfdb Carles
import os
15 4614bb6a Carles
import logging
16 4614bb6a Carles
17 92924d24 Carles
import numpy as np
18 92924d24 Carles
from rdkit.Chem import AllChem as Chem
19 92924d24 Carles
20 4614bb6a Carles
logger = logging.getLogger('DockOnSurf')
21 4614bb6a Carles
22 4614bb6a Carles
23 92924d24 Carles
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
24 b1f6e69d Carles
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
25 5c70d6c6 Carles

26 92924d24 Carles
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
27 92924d24 Carles
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
28 92924d24 Carles
    a carbon atom.
29 92924d24 Carles

30 92924d24 Carles
    The functions removes the hydrogen atoms bonded to carbon atoms while
31 92924d24 Carles
    keeping the ones bonded to other atoms or non-bonded at all.
32 5c70d6c6 Carles
    """
33 5c70d6c6 Carles
34 5c70d6c6 Carles
    mol = Chem.RWMol(mol)
35 5c70d6c6 Carles
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
36 5c70d6c6 Carles
37 5c70d6c6 Carles
    for atm_idx in rev_atm_idxs:
38 5c70d6c6 Carles
        atom = mol.GetAtomWithIdx(atm_idx)
39 5c70d6c6 Carles
        if atom.GetAtomicNum() != 1:
40 5c70d6c6 Carles
            continue
41 5c70d6c6 Carles
        for neigh in atom.GetNeighbors():
42 5c70d6c6 Carles
            if neigh.GetAtomicNum() == 6:
43 5c70d6c6 Carles
                mol.RemoveAtom(atom.GetIdx())
44 5c70d6c6 Carles
    return mol
45 5c70d6c6 Carles
46 5c70d6c6 Carles
47 92924d24 Carles
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
48 92924d24 Carles
    """Generate conformers in random orientations.
49 4614bb6a Carles

50 4614bb6a Carles
    @param mol: rdkit mol object of the molecule to be adsorbed.
51 92924d24 Carles
    @param num_confs: number of conformers to randomly generate.
52 b1f6e69d Carles
    @param local_min: bool: if generated conformers should be a local minimum.
53 4614bb6a Carles
    @return: mol: rdkit mol object containing the different conformers.
54 92924d24 Carles
             rmsd_mtx: triangular matrix with the rmsd values of conformers.
55 b1f6e69d Carles

56 92924d24 Carles
    Using the rdkit library, conformers are randomly generated. If structures 
57 92924d24 Carles
    are required to be local minima, ie. setting the 'local_min' value to 
58 92924d24 Carles
    True, a geometry optimisation using UFF is performed.
59 4614bb6a Carles
    """
60 92924d24 Carles
    logger.debug('Generating Conformers')
61 9e83b5a1 Carles
62 fab7b517 Carles
    Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
63 5c70d6c6 Carles
    Chem.AlignMolConformers(mol)
64 92924d24 Carles
    logger.info(f'Generated {len(mol.GetConformers())} conformers')
65 92924d24 Carles
    return mol
66 92924d24 Carles
67 62576f52 Carles
68 92924d24 Carles
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"):
69 92924d24 Carles
    """Computes the rmsd matrix of the conformers in a rdkit mol object.
70 92924d24 Carles

71 92924d24 Carles
    @param mol: rdkit mol object containing at least two conformers.
72 92924d24 Carles
    @param remove_Hs: bool or str,
73 92924d24 Carles
    @return rmsd_matrix: Matrix containing the rmsd values of every pair of
74 92924d24 Carles
    conformers.
75 92924d24 Carles

76 92924d24 Carles
    The RMSD values of every pair of conformers is computed, stored in matrix
77 92924d24 Carles
    form and returned back. The calculation of rmsd values can take into
78 92924d24 Carles
    account all hydrogens, none, or only the ones not linked to carbon atoms.
79 92924d24 Carles
    """
80 92924d24 Carles
    if mol.GetNumConformers() < 2:
81 92924d24 Carles
        err = "The provided molecule has less than 2 conformers"
82 92924d24 Carles
        logger.error(err)
83 92924d24 Carles
        raise ValueError(err)
84 92924d24 Carles
85 92924d24 Carles
    if not remove_Hs:
86 92924d24 Carles
        pass
87 92924d24 Carles
    elif remove_Hs or remove_Hs.lower() == "all":
88 92924d24 Carles
        mol = Chem.RemoveHs(mol)
89 92924d24 Carles
    elif remove_Hs.lower() == "c":
90 92924d24 Carles
        mol = remove_C_linked_Hs(mol)
91 92924d24 Carles
    else:
92 54be2a9e Carles
        err = "remove_Hs value does not have an acceptable value"
93 54be2a9e Carles
        logger.error(err)
94 54be2a9e Carles
        raise ValueError(err)
95 92924d24 Carles
96 92924d24 Carles
    num_confs = mol.GetNumConformers()
97 92924d24 Carles
    conf_ids = list(range(num_confs))
98 92924d24 Carles
    rmsd_mtx = np.zeros((num_confs, num_confs))
99 5c70d6c6 Carles
    for conf1 in conf_ids:
100 5c70d6c6 Carles
        for conf2 in conf_ids[conf1 + 1:]:
101 92924d24 Carles
            rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1)
102 92924d24 Carles
            rmsd_mtx[conf1][conf2] = rmsd
103 92924d24 Carles
            rmsd_mtx[conf2][conf1] = rmsd
104 92924d24 Carles
105 92924d24 Carles
    return rmsd_mtx
106 4614bb6a Carles
107 92924d24 Carles
108 92924d24 Carles
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
109 92924d24 Carles
    """Computes the moments of inertia of the given conformers
110 92924d24 Carles

111 92924d24 Carles
    @param mol: rdkit mol object of the relevant molecule.
112 92924d24 Carles
    @return numpy array 2D: The inner array contains the moments of inertia for
113 92924d24 Carles
    the three principal axis of a given conformer. They are ordered by its value
114 92924d24 Carles
    in ascending order. The outer tuple loops over the conformers.
115 92924d24 Carles
    """
116 92924d24 Carles
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
117 92924d24 Carles
118 92924d24 Carles
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
119 92924d24 Carles
                     for conf in range(mol.GetNumConformers())])
120 92924d24 Carles
121 92924d24 Carles
122 62576f52 Carles
def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000):
123 92924d24 Carles
    """Optimizes the geometry of the given conformers and returns the new mol
124 92924d24 Carles
    object and the energies of its conformers.
125 92924d24 Carles

126 92924d24 Carles
    @param mol: rdkit mol object of the relevant molecule.
127 62576f52 Carles
    @param max_iters: Maximum number of geometry optimization iterations. With 0
128 62576f52 Carles
    a single point energy calculation is performed and only the conformer
129 62576f52 Carles
    energies are returned.
130 92924d24 Carles
    @return mol: rdkit mol object of the optimized molecule.
131 92924d24 Carles
    @return numpy.ndarray: Array with the energies of the optimized conformers.
132 92924d24 Carles

133 92924d24 Carles
    The MMFF forcefield is used for the geometry optimization in its rdkit
134 62576f52 Carles
    implementation. With max_iters value set to 0, a single point energy
135 62576f52 Carles
    calculation is performed and only the energies are returned. For values
136 62576f52 Carles
    larger than 0, if the geometry does not converge for a certain conformer,
137 92924d24 Carles
    the latter is removed from the list of conformers and its energy is not
138 92924d24 Carles
    included in the returned list.
139 92924d24 Carles
    """
140 92924d24 Carles
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs
141 92924d24 Carles
142 92924d24 Carles
    init_num_confs = mol.GetNumConformers()
143 92924d24 Carles
    results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
144 62576f52 Carles
                                                 maxIters=max_iters,
145 62576f52 Carles
                                                 nonBondedThresh=10))
146 62576f52 Carles
147 62576f52 Carles
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
148 62576f52 Carles
    # return all conformers if optimization is switched off, ie. maxIters = 0
149 62576f52 Carles
    if max_iters > 0:
150 62576f52 Carles
        for i, conv in enumerate(results[:, 0]):
151 62576f52 Carles
            if conv != 0:
152 62576f52 Carles
                mol.RemoveConformer(i)
153 62576f52 Carles
        for i, conf in enumerate(mol.GetConformers()):
154 62576f52 Carles
            conf.SetId(i)
155 62576f52 Carles
        if mol.GetNumConformers() < init_num_confs:
156 62576f52 Carles
            logger.warning(f'MMFF Geometry optimization did not comverge for at'
157 62576f52 Carles
                           f'least one conformer. Continuing with '
158 62576f52 Carles
                           f'{mol.GetNumConformers()} converged conformers')
159 62576f52 Carles
        logger.info(f'Optimized conformers with MMFF.')
160 62576f52 Carles
        return mol, np.array([res[1] for res in results if res[0] == 0])
161 62576f52 Carles
    else:
162 62576f52 Carles
        logger.info(f'Computed conformers energy with MMFF.')
163 62576f52 Carles
        return np.array([res[1] for res in results])
164 4614bb6a Carles
165 4614bb6a Carles
166 4614bb6a Carles
def run_isolated(inp_vars):
167 4614bb6a Carles
    """Directs the execution of functions to obtain the conformers to adsorb
168 4614bb6a Carles

169 a9b058a5 Carles
    @param inp_vars: Calculation parameters from input file.
170 4614bb6a Carles
    @return:
171 4614bb6a Carles
    """
172 f3004731 Carles
    from modules.formats import adapt_format, confs_to_mol_list, \
173 f3004731 Carles
        rdkit_mol_to_ase_atoms
174 15be7594 Carles
    from modules.clustering import clustering
175 3d6a9d3c Carles
    from modules.calculation import run_calc
176 54be2a9e Carles
177 4614bb6a Carles
    logger.info('Carrying out procedures for the isolated molecule')
178 4614bb6a Carles
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'])
179 92924d24 Carles
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
180 fd33bfdb Carles
    if inp_vars['min_confs']:
181 fd33bfdb Carles
        confs, confs_ener = mmff_opt_confs(confs)
182 fab7b517 Carles
    else:
183 fab7b517 Carles
        confs_ener = mmff_opt_confs(confs, max_iters=0)
184 34a03ee1 Carles
    rmsd_mtx = get_rmsd(confs)
185 fab7b517 Carles
    confs_moi = get_moments_of_inertia(confs)
186 fd33bfdb Carles
    exemplars = clustering(rmsd_mtx)
187 f3004731 Carles
    mol_list = confs_to_mol_list(confs, exemplars)
188 f3004731 Carles
    ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list]
189 f3004731 Carles
    run_calc('isolated', inp_vars, ase_atms_list)
190 92924d24 Carles
191 34a03ee1 Carles
    if 'moi' in inp_vars['cluster_magns']:
192 34a03ee1 Carles
        confs_moi = get_moments_of_inertia(confs)
193 92924d24 Carles
194 fd33bfdb Carles
    # if 'energy' in inp_vars['cluster_magns']: