Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ fd33bfdb

Historique | Voir | Annoter | Télécharger (8,69 ko)

1 4614bb6a Carles
"""Functions to generate the conformers to be adsorbed and the most stable one.
2 4614bb6a Carles

3 5c70d6c6 Carles
functions:
4 92924d24 Carles
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5 92924d24 Carles
    of independent mol objects.
6 5c70d6c6 Carles
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
7 92924d24 Carles
gen_confs: Generate a number of conformers in random orientations.
8 92924d24 Carles
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
9 92924d24 Carles
get_moments_of_inertia: Computes moments of inertia of the given conformers.
10 92924d24 Carles
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
11 92924d24 Carles
    new mol object and the energies of its conformers.
12 4614bb6a Carles
run_isolated: directs the execution of functions to achieve the goal
13 4614bb6a Carles
"""
14 fd33bfdb Carles
import os
15 4614bb6a Carles
import logging
16 4614bb6a Carles
17 92924d24 Carles
import numpy as np
18 92924d24 Carles
from rdkit.Chem import AllChem as Chem
19 92924d24 Carles
20 4614bb6a Carles
logger = logging.getLogger('DockOnSurf')
21 4614bb6a Carles
22 4614bb6a Carles
23 92924d24 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol):
24 92924d24 Carles
    """Converts the conformers inside a rdkit mol object to a list of
25 92924d24 Carles
    separate mol objects.
26 92924d24 Carles

27 92924d24 Carles
    @param mol: rdkit mol object containing at least one conformer.
28 92924d24 Carles
    @return list: list of separate mol objects.
29 92924d24 Carles
    """
30 92924d24 Carles
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId()))
31 92924d24 Carles
            for conf in mol.GetConformers()]
32 92924d24 Carles
33 92924d24 Carles
34 92924d24 Carles
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
35 b1f6e69d Carles
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
36 5c70d6c6 Carles

37 92924d24 Carles
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
38 92924d24 Carles
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
39 92924d24 Carles
    a carbon atom.
40 92924d24 Carles

41 92924d24 Carles
    The functions removes the hydrogen atoms bonded to carbon atoms while
42 92924d24 Carles
    keeping the ones bonded to other atoms or non-bonded at all.
43 5c70d6c6 Carles
    """
44 5c70d6c6 Carles
45 5c70d6c6 Carles
    mol = Chem.RWMol(mol)
46 5c70d6c6 Carles
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
47 5c70d6c6 Carles
48 5c70d6c6 Carles
    for atm_idx in rev_atm_idxs:
49 5c70d6c6 Carles
        atom = mol.GetAtomWithIdx(atm_idx)
50 5c70d6c6 Carles
        if atom.GetAtomicNum() != 1:
51 5c70d6c6 Carles
            continue
52 5c70d6c6 Carles
        for neigh in atom.GetNeighbors():
53 5c70d6c6 Carles
            if neigh.GetAtomicNum() == 6:
54 5c70d6c6 Carles
                mol.RemoveAtom(atom.GetIdx())
55 5c70d6c6 Carles
    return mol
56 5c70d6c6 Carles
57 5c70d6c6 Carles
58 92924d24 Carles
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
59 92924d24 Carles
    """Generate conformers in random orientations.
60 4614bb6a Carles

61 4614bb6a Carles
    @param mol: rdkit mol object of the molecule to be adsorbed.
62 92924d24 Carles
    @param num_confs: number of conformers to randomly generate.
63 b1f6e69d Carles
    @param local_min: bool: if generated conformers should be a local minimum.
64 4614bb6a Carles
    @return: mol: rdkit mol object containing the different conformers.
65 92924d24 Carles
             rmsd_mtx: triangular matrix with the rmsd values of conformers.
66 b1f6e69d Carles

67 92924d24 Carles
    Using the rdkit library, conformers are randomly generated. If structures 
68 92924d24 Carles
    are required to be local minima, ie. setting the 'local_min' value to 
69 92924d24 Carles
    True, a geometry optimisation using UFF is performed.
70 4614bb6a Carles
    """
71 92924d24 Carles
    logger.debug('Generating Conformers')
72 9e83b5a1 Carles
73 5c70d6c6 Carles
    conf_ids = Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
74 b1f6e69d Carles
    if local_min:
75 b1f6e69d Carles
        for conf in conf_ids:
76 b1f6e69d Carles
            Chem.UFFOptimizeMolecule(mol, confId=conf)
77 5c70d6c6 Carles
78 5c70d6c6 Carles
    Chem.AlignMolConformers(mol)
79 92924d24 Carles
    logger.info(f'Generated {len(mol.GetConformers())} conformers')
80 92924d24 Carles
    return mol
81 92924d24 Carles
82 62576f52 Carles
83 92924d24 Carles
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"):
84 92924d24 Carles
    """Computes the rmsd matrix of the conformers in a rdkit mol object.
85 92924d24 Carles

86 92924d24 Carles
    @param mol: rdkit mol object containing at least two conformers.
87 92924d24 Carles
    @param remove_Hs: bool or str,
88 92924d24 Carles
    @return rmsd_matrix: Matrix containing the rmsd values of every pair of
89 92924d24 Carles
    conformers.
90 92924d24 Carles

91 92924d24 Carles
    The RMSD values of every pair of conformers is computed, stored in matrix
92 92924d24 Carles
    form and returned back. The calculation of rmsd values can take into
93 92924d24 Carles
    account all hydrogens, none, or only the ones not linked to carbon atoms.
94 92924d24 Carles
    """
95 92924d24 Carles
    if mol.GetNumConformers() < 2:
96 92924d24 Carles
        err = "The provided molecule has less than 2 conformers"
97 92924d24 Carles
        logger.error(err)
98 92924d24 Carles
        raise ValueError(err)
99 92924d24 Carles
100 92924d24 Carles
    if not remove_Hs:
101 92924d24 Carles
        pass
102 92924d24 Carles
    elif remove_Hs or remove_Hs.lower() == "all":
103 92924d24 Carles
        mol = Chem.RemoveHs(mol)
104 92924d24 Carles
    elif remove_Hs.lower() == "c":
105 92924d24 Carles
        mol = remove_C_linked_Hs(mol)
106 92924d24 Carles
    else:
107 54be2a9e Carles
        err = "remove_Hs value does not have an acceptable value"
108 54be2a9e Carles
        logger.error(err)
109 54be2a9e Carles
        raise ValueError(err)
110 92924d24 Carles
111 92924d24 Carles
    num_confs = mol.GetNumConformers()
112 92924d24 Carles
    conf_ids = list(range(num_confs))
113 92924d24 Carles
    rmsd_mtx = np.zeros((num_confs, num_confs))
114 5c70d6c6 Carles
    for conf1 in conf_ids:
115 5c70d6c6 Carles
        for conf2 in conf_ids[conf1 + 1:]:
116 92924d24 Carles
            rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1)
117 92924d24 Carles
            rmsd_mtx[conf1][conf2] = rmsd
118 92924d24 Carles
            rmsd_mtx[conf2][conf1] = rmsd
119 92924d24 Carles
120 92924d24 Carles
    return rmsd_mtx
121 4614bb6a Carles
122 92924d24 Carles
123 92924d24 Carles
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
124 92924d24 Carles
    """Computes the moments of inertia of the given conformers
125 92924d24 Carles

126 92924d24 Carles
    @param mol: rdkit mol object of the relevant molecule.
127 92924d24 Carles
    @return numpy array 2D: The inner array contains the moments of inertia for
128 92924d24 Carles
    the three principal axis of a given conformer. They are ordered by its value
129 92924d24 Carles
    in ascending order. The outer tuple loops over the conformers.
130 92924d24 Carles
    """
131 92924d24 Carles
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
132 92924d24 Carles
133 92924d24 Carles
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
134 92924d24 Carles
                     for conf in range(mol.GetNumConformers())])
135 92924d24 Carles
136 92924d24 Carles
137 62576f52 Carles
def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000):
138 92924d24 Carles
    """Optimizes the geometry of the given conformers and returns the new mol
139 92924d24 Carles
    object and the energies of its conformers.
140 92924d24 Carles

141 92924d24 Carles
    @param mol: rdkit mol object of the relevant molecule.
142 62576f52 Carles
    @param max_iters: Maximum number of geometry optimization iterations. With 0
143 62576f52 Carles
    a single point energy calculation is performed and only the conformer
144 62576f52 Carles
    energies are returned.
145 92924d24 Carles
    @return mol: rdkit mol object of the optimized molecule.
146 92924d24 Carles
    @return numpy.ndarray: Array with the energies of the optimized conformers.
147 92924d24 Carles

148 92924d24 Carles
    The MMFF forcefield is used for the geometry optimization in its rdkit
149 62576f52 Carles
    implementation. With max_iters value set to 0, a single point energy
150 62576f52 Carles
    calculation is performed and only the energies are returned. For values
151 62576f52 Carles
    larger than 0, if the geometry does not converge for a certain conformer,
152 92924d24 Carles
    the latter is removed from the list of conformers and its energy is not
153 92924d24 Carles
    included in the returned list.
154 92924d24 Carles
    """
155 92924d24 Carles
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs
156 92924d24 Carles
157 92924d24 Carles
    init_num_confs = mol.GetNumConformers()
158 92924d24 Carles
    results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
159 62576f52 Carles
                                                 maxIters=max_iters,
160 62576f52 Carles
                                                 nonBondedThresh=10))
161 62576f52 Carles
162 62576f52 Carles
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
163 62576f52 Carles
    # return all conformers if optimization is switched off, ie. maxIters = 0
164 62576f52 Carles
    if max_iters > 0:
165 62576f52 Carles
        for i, conv in enumerate(results[:, 0]):
166 62576f52 Carles
            if conv != 0:
167 62576f52 Carles
                mol.RemoveConformer(i)
168 62576f52 Carles
        for i, conf in enumerate(mol.GetConformers()):
169 62576f52 Carles
            conf.SetId(i)
170 62576f52 Carles
        if mol.GetNumConformers() < init_num_confs:
171 62576f52 Carles
            logger.warning(f'MMFF Geometry optimization did not comverge for at'
172 62576f52 Carles
                           f'least one conformer. Continuing with '
173 62576f52 Carles
                           f'{mol.GetNumConformers()} converged conformers')
174 62576f52 Carles
        logger.info(f'Optimized conformers with MMFF.')
175 62576f52 Carles
        return mol, np.array([res[1] for res in results if res[0] == 0])
176 62576f52 Carles
    else:
177 62576f52 Carles
        logger.info(f'Computed conformers energy with MMFF.')
178 62576f52 Carles
        return np.array([res[1] for res in results])
179 4614bb6a Carles
180 4614bb6a Carles
181 fd33bfdb Carles
def prep_run(inp_file, mol: Chem.rdchem.Mol, exemplars):
182 fd33bfdb Carles
    from shutil import copy
183 fd33bfdb Carles
    # Checking if 'isolated' directory already exists and if so backing it up.
184 fd33bfdb Carles
    dir_name = 'isolated'
185 fd33bfdb Carles
    bak_num = 0
186 fd33bfdb Carles
    while dir_name in os.listdir("."):
187 fd33bfdb Carles
        bak_num += 1
188 fd33bfdb Carles
        dir_name = dir_name.split(".")[0]+f".bak{bak_num}"
189 fd33bfdb Carles
    if bak_num > 0:
190 fd33bfdb Carles
        os.rename('isolated', dir_name)
191 fd33bfdb Carles
        logger.warning("'isolated' directory already present. Moved former "
192 fd33bfdb Carles
                       f"directory to {dir_name}")
193 fd33bfdb Carles
    os.mkdir('isolated')
194 fd33bfdb Carles
195 fd33bfdb Carles
    # Creating
196 fd33bfdb Carles
    for i, conf in enumerate(exemplars):
197 fd33bfdb Carles
        os.mkdir(f'isolated/conf_{i}')
198 fd33bfdb Carles
        copy(inp_file, f'isolated/conf_{i}/')
199 fd33bfdb Carles
        Chem.MolToXYZFile(mol, f'isolated/conf_{i}/coord.xyz', confId=conf)
200 fd33bfdb Carles
201 fd33bfdb Carles
202 4614bb6a Carles
def run_isolated(inp_vars):
203 4614bb6a Carles
    """Directs the execution of functions to obtain the conformers to adsorb
204 4614bb6a Carles

205 4614bb6a Carles
    @param inp_vars:
206 4614bb6a Carles
    @return:
207 4614bb6a Carles
    """
208 54be2a9e Carles
    from formats import adapt_format
209 75325cc3 Carles
    from clustering import clustering
210 54be2a9e Carles
211 34a03ee1 Carles
    # from clustering import *
212 4614bb6a Carles
    logger.info('Carrying out procedures for the isolated molecule')
213 4614bb6a Carles
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'])
214 92924d24 Carles
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
215 fd33bfdb Carles
    if inp_vars['min_confs']:
216 fd33bfdb Carles
        confs, confs_ener = mmff_opt_confs(confs)
217 34a03ee1 Carles
    rmsd_mtx = get_rmsd(confs)
218 fd33bfdb Carles
    exemplars = clustering(rmsd_mtx)
219 fd33bfdb Carles
    prep_run(inp_vars['isol_inp_file'], confs, exemplars)
220 fd33bfdb Carles
    # run dft
221 92924d24 Carles
222 34a03ee1 Carles
    if 'moi' in inp_vars['cluster_magns']:
223 34a03ee1 Carles
        confs_moi = get_moments_of_inertia(confs)
224 92924d24 Carles
225 fd33bfdb Carles
    # if 'energy' in inp_vars['cluster_magns']: