Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ 082685ad

Historique | Voir | Annoter | Télécharger (7,69 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
5
gen_confs: Generate a number of conformers in random orientations.
6
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
7
get_moments_of_inertia: Computes moments of inertia of the given conformers.
8
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
9
    new mol object and the energies of its conformers.
10
run_isolated: directs the execution of functions to achieve the goal
11
"""
12
import logging
13

    
14
import numpy as np
15
import rdkit.Chem.AllChem as Chem
16

    
17
from modules.calculation import check_finished_calcs
18
from modules.refinement import select_confs
19
from modules.formats import collect_coords
20

    
21
logger = logging.getLogger('DockOnSurf')
22

    
23

    
24
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
25
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
26

27
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
28
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
29
    a carbon atom.
30

31
    The functions removes the hydrogen atoms bonded to carbon atoms while
32
    keeping the ones bonded to other atoms or non-bonded at all.
33
    """
34

    
35
    mol = Chem.RWMol(mol)
36
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
37

    
38
    for atm_idx in rev_atm_idxs:
39
        atom = mol.GetAtomWithIdx(atm_idx)
40
        if atom.GetAtomicNum() != 1:
41
            continue
42
        for neigh in atom.GetNeighbors():
43
            if neigh.GetAtomicNum() == 6:
44
                mol.RemoveAtom(atom.GetIdx())
45
    return mol
46

    
47

    
48
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int):
49
    """Generate conformers in random orientations.
50

51
    @param mol: rdkit mol object of the molecule to be adsorbed.
52
    @param num_confs: number of conformers to randomly generate.
53
    @return: mol: rdkit mol object containing the different conformers.
54
             rmsd_mtx: Matrix with the rmsd values of conformers.
55

56
    Using the rdkit library, conformers are randomly generated. If structures 
57
    are required to be local minima, ie. setting the 'local_min' value to 
58
    True, a geometry optimisation using UFF is performed.
59
    """
60
    logger.debug('Generating Conformers.')
61

    
62
    mol = Chem.AddHs(mol)
63
    Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0,
64
                            randomSeed=5817216)
65
    Chem.AlignMolConformers(mol)
66
    logger.info(f'Generated {len(mol.GetConformers())} conformers.')
67
    return mol
68

    
69

    
70
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
71
    """Computes the moments of inertia of the given conformers
72

73
    @param mol: rdkit mol object of the relevant molecule.
74
    @return numpy array 2D: The inner array contains the moments of inertia for
75
    the three principal axis of a given conformer. They are ordered by its value
76
    in ascending order. The outer tuple loops over the conformers.
77
    """
78
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
79

    
80
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
81
                     for conf in range(mol.GetNumConformers())])
82

    
83

    
84
def pre_opt_confs(mol: Chem.rdchem.Mol, force_field='mmff', max_iters=2000):
85
    """Optimizes the geometry of the given conformers and returns the new mol
86
    object and the energies of its conformers.
87

88
    @param mol: rdkit mol object of the relevant molecule.
89
    @param force_field: Force Field to use for the pre-optimization.
90
    @param max_iters: Maximum number of geometry optimization iterations. With 0
91
    a single point energy calculation is performed and only the conformer
92
    energies are returned.
93
    @return mol: rdkit mol object of the optimized molecule.
94
    @return numpy.ndarray: Array with the energies of the optimized conformers.
95

96
    The MMFF and UFF force fields can be used for the geometry optimization in
97
    their rdkit implementation. With max_iters value set to 0, a single point
98
    energy calculation is performed and only the energies are returned. For
99
    values larger than 0, if the geometry does not converge for a certain
100
    conformer, the latter is removed from the list of conformers and its energy
101
    is not included in the returned list.
102
    """
103
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs, \
104
        UFFOptimizeMoleculeConfs
105

    
106
    init_num_confs = mol.GetNumConformers()
107
    if force_field == 'mmff':
108
        results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
109
                                                     maxIters=max_iters,
110
                                                     nonBondedThresh=10))
111
    elif force_field == 'uff':
112
        results = np.array(UFFOptimizeMoleculeConfs(mol, numThreads=0,
113
                                                    maxIters=max_iters,
114
                                                    vdwThresh=10))
115
    else:
116
        logger.error("force_field parameter must be 'MMFF' or 'UFF'")
117
        raise ValueError("force_field parameter must be 'MMFF' or 'UFF'")
118

    
119
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
120
    # return all conformers if optimization is switched off, ie. maxIters = 0
121
    if max_iters > 0:
122
        for i, conv in enumerate(results[:, 0]):
123
            if conv != 0:
124
                mol.RemoveConformer(i)
125
        for i, conf in enumerate(mol.GetConformers()):
126
            conf.SetId(i)
127
        if mol.GetNumConformers() < init_num_confs:
128
            logger.warning(f'Geometry optimization did not comverge for at'
129
                           f'least one conformer. Continuing with '
130
                           f'{mol.GetNumConformers()} converged conformers.')
131
        logger.info(f'Pre-optimized conformers with {force_field}.')
132
        return mol, np.array([res[1] for res in results if res[0] == 0])
133
    else:
134
        logger.info(f'Computed conformers energy with {force_field}.')
135
        return np.array([res[1] for res in results])
136

    
137

    
138
def run_isolated(inp_vars):
139
    """Directs the execution of functions to obtain the conformers to adsorb
140

141
    @param inp_vars: Calculation parameters from input file.
142
    @return:
143
    """
144
    from modules.formats import adapt_format, confs_to_mol_list, \
145
        rdkit_mol_to_ase_atoms
146
    from modules.clustering import clustering, get_rmsd
147
    from modules.calculation import run_calc
148

    
149
    logger.info('Carrying out procedures for the isolated molecule.')
150
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'],
151
                          inp_vars['special_atoms'])
152
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
153
    if inp_vars['pre_opt']:
154
        confs, confs_ener = pre_opt_confs(confs, inp_vars['pre_opt'])
155
    else:
156
        confs_ener = pre_opt_confs(confs, max_iters=0)
157
    conf_list = confs_to_mol_list(confs)
158
    rmsd_mtx = get_rmsd(conf_list)
159
    confs_moi = get_moments_of_inertia(confs)
160
    exemplars = clustering(rmsd_mtx)
161
    mol_list = confs_to_mol_list(confs, exemplars)
162
    ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list]
163
    if len(ase_atms_list) == 0:
164
        err_msg = "No configurations were generated: Check the parameters in" \
165
                  "dockonsurf.inp"
166
        logger.error(err_msg)
167
        raise ValueError(err_msg)
168
    run_calc('isolated', inp_vars, ase_atms_list)
169
    finished_calcs, unfinished_calcs = check_finished_calcs('isolated',
170
                                                            inp_vars['code'])
171
    conf_list = collect_coords(finished_calcs, inp_vars['code'], 'isolated',
172
                               inp_vars['special_atoms'])
173
    most_stable_conf = select_confs(conf_list, finished_calcs, 0,
174
                                    inp_vars['code'])[0]
175
    logger.info('Finished the procedures for the isolated molecule section. '
176
                f'Most stable conformers is {most_stable_conf}.')