Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ cf980c86

Historique | Voir | Annoter | Télécharger (7,96 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
5
gen_confs: Generate a number of conformers in random orientations.
6
get_moments_of_inertia: Computes moments of inertia of the given conformers.
7
get_moments_of_inertia: Computes the moments of inertia of the given conformers.
8
pre_opt_confs: Optimizes the geometry of the given conformers and returns the
9
    new mol object and the energies of its conformers.
10
run_isolated: directs the execution of functions to achieve the goal
11
"""
12
import logging
13

    
14
import numpy as np
15
import rdkit.Chem.AllChem as Chem
16

    
17
logger = logging.getLogger('DockOnSurf')
18

    
19

    
20
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
21
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
22

23
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
24
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
25
    a carbon atom.
26

27
    The functions removes the hydrogen atoms bonded to carbon atoms while
28
    keeping the ones bonded to other atoms or non-bonded at all.
29
    """
30

    
31
    mol = Chem.RWMol(mol)
32
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
33

    
34
    for atm_idx in rev_atm_idxs:
35
        atom = mol.GetAtomWithIdx(atm_idx)
36
        if atom.GetAtomicNum() != 1:
37
            continue
38
        for neigh in atom.GetNeighbors():
39
            if neigh.GetAtomicNum() == 6:
40
                mol.RemoveAtom(atom.GetIdx())
41
    return mol
42

    
43

    
44
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int):
45
    """Generate conformers in random orientations.
46

47
    @param mol: rdkit mol object of the molecule to be adsorbed.
48
    @param num_confs: number of conformers to randomly generate.
49
    @return: mol: rdkit mol object containing the different conformers.
50
             rmsd_mtx: Matrix with the rmsd values of conformers.
51

52
    Using the rdkit library, conformers are randomly generated. If structures 
53
    are required to be local minima, ie. setting the 'local_min' value to 
54
    True, a geometry optimisation using UFF is performed.
55
    """
56
    logger.debug('Generating Conformers.')
57

    
58
    mol = Chem.AddHs(mol)
59
    Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0,
60
                            randomSeed=5817216)
61
    Chem.AlignMolConformers(mol)
62
    logger.info(f'Generated {len(mol.GetConformers())} conformers.')
63
    return mol
64

    
65

    
66
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
67
    """Computes the moments of inertia of the given conformers.
68

69
    @param mol: rdkit mol object of the relevant molecule.
70
    @return numpy array 2D: The inner array contains the moments of inertia for
71
    the three principal axis of a given conformer. They are ordered by its value
72
    in ascending order. The outer tuple loops over the conformers.
73
    """
74
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
75

    
76
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
77
                     for conf in range(mol.GetNumConformers())])
78

    
79

    
80
def pre_opt_confs(mol: Chem.rdchem.Mol, force_field='mmff', max_iters=2000):
81
    """Optimizes the geometry of the given conformers and returns the new mol
82
    object and the energies of its conformers.
83

84
    @param mol: rdkit mol object of the relevant molecule.
85
    @param force_field: Force Field to use for the pre-optimization.
86
    @param max_iters: Maximum number of geometry optimization iterations. With 0
87
    a single point energy calculation is performed and only the conformer
88
    energies are returned.
89
    @return mol: rdkit mol object of the optimized molecule.
90
    @return numpy.ndarray: Array with the energies of the optimized conformers.
91

92
    The MMFF and UFF force fields can be used for the geometry optimization in
93
    their rdkit implementation. With max_iters value set to 0, a single point
94
    energy calculation is performed and only the energies are returned. For
95
    values larger than 0, if the geometry does not converge for a certain
96
    conformer, the latter is removed from the list of conformers and its energy
97
    is not included in the returned list.
98
    """
99
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs, \
100
        UFFOptimizeMoleculeConfs
101

    
102
    init_num_confs = mol.GetNumConformers()
103
    if force_field == 'mmff':
104
        results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
105
                                                     maxIters=max_iters,
106
                                                     nonBondedThresh=10))
107
    elif force_field == 'uff':
108
        results = np.array(UFFOptimizeMoleculeConfs(mol, numThreads=0,
109
                                                    maxIters=max_iters,
110
                                                    vdwThresh=10))
111
    else:
112
        logger.error("force_field parameter must be 'MMFF' or 'UFF'")
113
        raise ValueError("force_field parameter must be 'MMFF' or 'UFF'")
114

    
115
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
116
    # return all conformers if optimization is switched off, ie. maxIters = 0
117
    if max_iters > 0:
118
        for i, conv in enumerate(results[:, 0]):
119
            if conv != 0:
120
                mol.RemoveConformer(i)
121
        for i, conf in enumerate(mol.GetConformers()):
122
            conf.SetId(i)
123
        if mol.GetNumConformers() < init_num_confs:
124
            logger.warning(f'Geometry optimization did not comverge for at'
125
                           f'least one conformer. Continuing with '
126
                           f'{mol.GetNumConformers()} converged conformers.')
127
        logger.info(f'Pre-optimized conformers with {force_field}.')
128
        return mol, np.array([res[1] for res in results if res[0] == 0])
129
    else:
130
        logger.info(f'Computed conformers energy with {force_field}.')
131
        return np.array([res[1] for res in results])
132

    
133

    
134
def run_isolated(inp_vars):
135
    """Directs the execution of functions to obtain the conformers to adsorb
136

137
    @param inp_vars: Calculation parameters from input file.
138
    @return:
139
    """
140
    from modules.formats import adapt_format, confs_to_mol_list, \
141
        rdkit_mol_to_ase_atoms, collect_confs
142
    from modules.clustering import clustering, get_rmsd
143
    from modules.calculation import run_calc, check_finished_calcs
144
    from modules.refinement import select_stable_confs
145

    
146
    logger.info('Carrying out procedures for the isolated molecule.')
147
    # Read the molecule
148
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'],
149
                          inp_vars['special_atoms'])
150
    # Generate conformers
151
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
152
    # Pre-optimizes conformers
153
    if inp_vars['pre_opt']:
154
        confs, confs_ener = pre_opt_confs(confs, inp_vars['pre_opt'])
155
    else:
156
        confs_ener = pre_opt_confs(confs, max_iters=0)
157
    conf_list = confs_to_mol_list(confs)
158
    # Calculates RMSD matrix of the conformers
159
    rmsd_mtx = get_rmsd(conf_list)
160
    confs_moi = get_moments_of_inertia(confs)
161
    # Clusters the conformers and selects a representative
162
    exemplars = clustering(rmsd_mtx)
163
    mol_list = confs_to_mol_list(confs, exemplars)
164
    ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list]
165
    if len(ase_atms_list) == 0:
166
        err_msg = "No configurations were generated: Check the parameters in" \
167
                  "dockonsurf.inp"
168
        logger.error(err_msg)
169
        raise ValueError(err_msg)
170
    # Runs the jobs.
171
    run_calc('isolated', inp_vars, ase_atms_list)
172
    logger.info("Finished the procedures for the isolated molecule section. ")
173
    if inp_vars["batch_q_sys"]:
174
        finished_calcs, failed_calcs = check_finished_calcs('isolated',
175
                                                            inp_vars['code'])
176
        conf_list = collect_confs(finished_calcs, inp_vars['code'], 'isolated',
177
                              inp_vars['special_atoms'])
178
        most_stable_conf = select_stable_confs(conf_list, 0)[0]
179

    
180
        logger.info(f"Most stable conformers is {most_stable_conf.info['iso']},"
181
                    f" with a total energy of {most_stable_conf.info['energy']}"
182
                    f" eV.")