Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ e7d9c7e8

Historique | Voir | Annoter | Télécharger (9,09 ko)

1
"""Module for the conversion between atomic coordinates files and objects
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of separate mol objects.
6
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
7
adapt_format: Converts the coordinate files into a required library object type.
8
read_coords: Reads the atomic coordinates resulting from finished calculations.
9
"""
10

    
11
import logging
12

    
13
import rdkit.Chem.AllChem as Chem
14

    
15
logger = logging.getLogger('DockOnSurf')
16

    
17

    
18
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
19
    """Converts the conformers inside a rdkit mol object to a list of
20
    separate mol objects.
21

22
    @param mol: rdkit mol object containing at least one conformer.
23
    @param idx_lst: list of conformer indices to be considered. If not passed,
24
        all conformers are considered.
25
    @return: list of separate mol objects.
26
    """
27
    if idx_lst is None:
28
        idx_lst = list(range(mol.GetNumConformers()))
29
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
30
                                 removeHs=False) for idx in idx_lst]
31

    
32

    
33
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
34
    """Converts a rdkit mol object into ase Atoms object.
35
    @param mol: rdkit mol object containing only one conformer.
36
    @return ase.Atoms: ase Atoms object with the same coordinates.
37
    """
38
    from ase import Atoms
39
    if mol.GetNumConformers() > 1:
40
        logger.warning('A mol object with multiple conformers is parsed, '
41
                       'converting to Atoms only the first conformer.')
42
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
43
    positions = mol.GetConformer(0).GetPositions()
44
    return Atoms(symbols=symbols, positions=positions)
45

    
46

    
47
def add_special_atoms(symbol_pairs):
48
    """Allows to use custom elements with symbols not in the periodic table.
49

50
    This function adds new chemical elements to be used by ase. Every new custom
51
    element must have a traditional (present in the periodic table) partner
52
    from which to obtain all its properties.
53
    @param symbol_pairs: List of tuples containing the pairs of chemical symbols.
54
        Every tuple contains a pair of chemical symbols, the first label must be
55
        the label of the custom element and the second one the symbol of the
56
        reference one (traditional present on the periodic table).
57
    @return:
58
    """
59
    import numpy as np
60
    from ase import data
61
    for i, pair in enumerate(symbol_pairs):
62
        data.chemical_symbols += [pair[0]]
63
        z_orig = data.atomic_numbers[pair[1]]
64
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
65
        orig_com_mass = data.atomic_masses_common[z_orig]
66
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
67
        data.atomic_names += [pair[0]]
68
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
69
                                                 orig_iupac_mass)
70
        data.atomic_masses = data.atomic_masses_iupac2016
71
        data.atomic_masses_common = np.append(data.atomic_masses_common,
72
                                              orig_com_mass)
73
        data.covalent_radii = np.append(data.covalent_radii,
74
                                        data.covalent_radii[z_orig])
75
        data.reference_states += [data.reference_states[z_orig]]
76
        # TODO Add vdw_radii, gsmm and aml (smaller length)
77

    
78

    
79
def adapt_format(requirement, coord_file, spec_atms=tuple()):
80
    """Converts the coordinate files into a required library object type.
81

82
    Depending on the library required to use and the file type, it converts the
83
    coordinate file into a library-workable object.
84
    @param requirement: str, the library for which the conversion should be
85
    made. Accepted values: 'ase', 'rdkit'.
86
    @param coord_file: str, path to the coordinates file aiming to convert.
87
    Accepted file tyoes: 'xyz', 'mol'.
88
    @param spec_atms: List of tuples containing pairs of new/traditional
89
        chemical symbols.
90
    @return: an object the required library can work with.
91
    """
92
    import ase.io
93
    from ase.io.formats import filetype
94

    
95
    req_vals = ['rdkit', 'ase']
96
    file_type_vals = ['xyz', 'mol']
97
    lib_err = f"The conversion to the '{requirement}' library object type" \
98
              f" has not yet been implemented"
99
    conv_info = f"Converted {coord_file} to {requirement} object type"
100

    
101
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
102

    
103
    if requirement not in req_vals:
104
        logger.error(lib_err)
105
        raise NotImplementedError(lib_err)
106

    
107
    if filetype(coord_file) not in file_type_vals:
108
        logger.error(fil_type_err)
109
        raise NotImplementedError(fil_type_err)
110

    
111
    if requirement == 'rdkit':
112
        if filetype(coord_file) == 'xyz':
113
            from modules.xyz2mol import xyz2mol
114
            ase_atms = ase.io.read(coord_file)
115
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
116
            xyz_coordinates = ase_atms.positions.tolist()
117
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
118
            logger.debug(conv_info)
119
            return Chem.AddHs(rd_mol_obj)
120
        elif filetype(coord_file) == 'mol':
121
            logger.debug(conv_info)
122
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
123

    
124
    if requirement == 'ase':
125
        add_special_atoms(spec_atms)
126
        if filetype(coord_file) == 'xyz':
127
            logger.debug(conv_info)
128
            return ase.io.read(coord_file)
129
        elif filetype(coord_file) == 'mol':
130
            logger.debug(conv_info)
131
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
132
            return rdkit_mol_to_ase_atoms(rd_mol)
133

    
134

    
135
def read_coords_cp2k(file, spec_atoms=tuple()):
136
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
137
     object.
138

139
    @param file: The file to read containing the coordinates.
140
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
141
    @return: ase.Atoms object of the coordinates in the file.
142
    """
143
    import numpy as np
144
    from ase import Atoms
145
    from pycp2k import CP2K
146

    
147
    cp2k = CP2K()
148
    cp2k.parse(file)
149
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
150
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
151
    symbols = [atom.split()[0] for atom in raw_coords]
152
    positions = np.array([atom.split()[1:] for atom in raw_coords])
153
    if len(spec_atoms) > 0:
154
        add_special_atoms(spec_atoms)  # TODO check usage
155
    return Atoms(symbols=symbols, positions=positions)
156

    
157

    
158
def read_coords(conf_list, code, run_type, spec_atms=tuple()):
159
    """Reads the atomic coordinates resulting from finished calculations.
160

161
    Given a run_type ('isolated', 'screening' or 'refinement') directory
162
    containing different subdirectories with finished calculations in every
163
    subdirectory, it reads, from each subirectory, the final coordinates
164
    resulting from the calculation and returns a list of objects adequate to the
165
    required library.
166

167
    @param conf_list: List of directories where to read the coords.
168
    @param code: the code that produced the calculation results files.
169
    @param run_type: the type of calculation (and also the name of the folder)
170
                     containing the calculation subdirectories.
171
    @param req: The required library object type to make the list of (eg. rdkit,
172
                ase)
173
    @param spec_atms: List of tuples containing pairs of new/traditional
174
        chemical symbols.
175
    @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
176
    """
177
    from glob import glob
178
    atoms_list = []
179
    for conf in conf_list:
180
        if code == 'cp2k':
181
            read_coords_cp2k(glob(f"{run_type}/{conf}/*-1.restart")[0],
182
                             spec_atms)
183

    
184
    return atoms_list
185

    
186

    
187
def read_energy_cp2k(file):
188
    """Reads the energies of a CP2K out file and returns its final energy.
189

190
    @param file: The file from which the energy should be read.
191
    @return: The last energy on the out file.
192
    """
193
    out_fh = open(file, 'r')
194
    energy = None
195
    for line in out_fh:
196
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
197
            energy = float(line.strip().split(':')[1])
198
    out_fh.close()
199
    return energy
200

    
201

    
202
def read_energies(conf_list, code, run_type):
203
    """Reads the energies resulting from finished calculations.
204

205
    Given a run_type ('isolated', 'screening' or 'refinement') directory
206
    containing different subdirectories with finished calculations in every
207
    subdirectory, it reads the final energies of calculations inside each
208
    subdirectory.
209

210
    @param conf_list: List of directories where to read the energy.
211
    @param code: the code that produced the calculation results files.
212
    @param run_type: the type of calculation (and also the name of the folder)
213
                     containing the calculation subdirectories.
214
    @return: list of energies
215
    """
216
    import numpy as np
217

    
218
    energies = []
219
    for conf in conf_list:
220
        if code == 'cp2k':
221
            energies.append(read_energy_cp2k(conf))
222

    
223
    if len(energies) == 0:
224
        err = f"No results found on {run_type}"
225
        logger.error(err)
226
        raise FileNotFoundError(err)
227

    
228
    return np.array(energies)