Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ 365d5b9a

Historique | Voir | Annoter | Télécharger (9,78 ko)

1
"""Module for the interconversion between different kinds of atomic data.
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
5
    of separate rdkit.Mol objects.
6
rdkit_mol_to_ase_atoms: Converts a rdkit.Mol object into ase.Atoms object.
7
add_special_atoms: Allows ase to use custom elements with symbols not in the
8
    periodic table.
9
adapt_format: Converts the coordinate files into a required library object type.
10
read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
11
    ase.Atoms object.
12
collect_coords: Directs the reading of coordinates on a set of subdirectories.
13
read_energy_cp2k: Reads the CP2K out file and returns its final energy.
14
collect_energies: Directs the reading of energies on a set of subdirectories.
15
"""
16

    
17
import logging
18

    
19
import rdkit.Chem.AllChem as Chem
20

    
21
logger = logging.getLogger('DockOnSurf')
22

    
23

    
24
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
25
    """Converts the conformers inside a rdkit mol object to a list of
26
    separate mol objects.
27

28
    @param mol: rdkit mol object containing at least one conformer.
29
    @param idx_lst: list of conformer indices to be considered. If not passed,
30
        all conformers are considered.
31
    @return: list of separate mol objects.
32
    """
33
    if idx_lst is None:
34
        idx_lst = list(range(mol.GetNumConformers()))
35
    return [Chem.MolFromMolBlock(
36
        Chem.MolToMolBlock(mol, confId=int(idx)).replace("3D", ""),
37
        removeHs=False) for idx in idx_lst]
38

    
39

    
40
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
41
    """Converts a rdkit mol object into ase Atoms object.
42
    @param mol: rdkit mol object containing only one conformer.
43
    @return ase.Atoms: ase Atoms object with the same coordinates.
44
    """
45
    from ase import Atoms
46
    if mol.GetNumConformers() > 1:
47
        logger.warning('A mol object with multiple conformers is parsed, '
48
                       'converting to Atoms only the first conformer.')
49
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
50
    positions = mol.GetConformer(0).GetPositions()
51
    return Atoms(symbols=symbols, positions=positions)
52

    
53

    
54
def add_special_atoms(symbol_pairs):
55
    """Allows ase to use custom elements with symbols not in the periodic table.
56

57
    This function adds new chemical elements to be used by ase. Every new custom
58
    element must have a traditional (present in the periodic table) partner
59
    from which to obtain all its properties.
60
    @param symbol_pairs: List of tuples containing pairs of chemical symbols.
61
        Every tuple contains a pair of chemical symbols, the first label must be
62
        the label of the custom element and the second one the symbol of the
63
        reference one (traditional present on the periodic table).
64
    @return:
65
    """  # TODO Enable special atoms for rdkit
66
    import numpy as np
67
    from ase import data
68
    for i, pair in enumerate(symbol_pairs):
69
        data.chemical_symbols += [pair[0]]
70
        z_orig = data.atomic_numbers[pair[1]]
71
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
72
        orig_com_mass = data.atomic_masses_common[z_orig]
73
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
74
        data.atomic_names += [pair[0]]
75
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
76
                                                 orig_iupac_mass)
77
        data.atomic_masses = data.atomic_masses_iupac2016
78
        data.atomic_masses_common = np.append(data.atomic_masses_common,
79
                                              orig_com_mass)
80
        data.covalent_radii = np.append(data.covalent_radii,
81
                                        data.covalent_radii[z_orig])
82
        data.reference_states += [data.reference_states[z_orig]]
83
        # TODO Add vdw_radii, gsmm and aml (smaller length)
84

    
85

    
86
def adapt_format(requirement, coord_file, spec_atms=tuple()):
87
    """Converts the coordinate files into a required library object type.
88

89
    Depending on the library required to use and the file type, it converts the
90
    coordinate file into a library-workable object.
91
    @param requirement: str, the library for which the conversion should be
92
    made. Accepted values: 'ase', 'rdkit'.
93
    @param coord_file: str, path to the coordinates file aiming to convert.
94
    Accepted file tyoes: 'xyz', 'mol'.
95
    @param spec_atms: List of tuples containing pairs of new/traditional
96
        chemical symbols.
97
    @return: an object the required library can work with.
98
    """  # TODO POSCAR/CONTCAR files
99
    import ase.io
100
    from ase.io.formats import filetype
101

    
102
    req_vals = ['rdkit', 'ase']
103
    file_type_vals = ['xyz', 'mol']
104
    lib_err = f"The conversion to the '{requirement}' library object type" \
105
              f" has not yet been implemented"
106
    conv_info = f"Converted {coord_file} to {requirement} object type"
107

    
108
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
109

    
110
    if requirement not in req_vals:
111
        logger.error(lib_err)
112
        raise NotImplementedError(lib_err)
113

    
114
    if filetype(coord_file) not in file_type_vals:
115
        logger.error(fil_type_err)
116
        raise NotImplementedError(fil_type_err)
117

    
118
    if requirement == 'rdkit':
119
        if filetype(coord_file) == 'xyz':
120
            from modules.xyz2mol import xyz2mol
121
            ase_atms = ase.io.read(coord_file)
122
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
123
            xyz_coordinates = ase_atms.positions.tolist()
124
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
125
            logger.debug(conv_info)
126
            return Chem.AddHs(rd_mol_obj)
127
        elif filetype(coord_file) == 'mol':
128
            logger.debug(conv_info)
129
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
130

    
131
    if requirement == 'ase':
132
        add_special_atoms(spec_atms)
133
        if filetype(coord_file) == 'xyz':
134
            logger.debug(conv_info)
135
            return ase.io.read(coord_file)
136
        elif filetype(coord_file) == 'mol':
137
            logger.debug(conv_info)
138
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
139
            return rdkit_mol_to_ase_atoms(rd_mol)
140

    
141

    
142
def read_coords_cp2k(file, spec_atoms=tuple()):
143
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
144
     object.
145

146
    @param file: The file to read containing the coordinates.
147
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
148
    @return: ase.Atoms object of the coordinates in the file.
149
    """
150
    import numpy as np
151
    from ase import Atoms
152
    from pycp2k import CP2K
153

    
154
    cp2k = CP2K()
155
    cp2k.parse(file)
156
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
157
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
158
    symbols = [atom.split()[0] for atom in raw_coords]
159
    positions = np.array([atom.split()[1:] for atom in raw_coords])
160
    if len(spec_atoms) > 0:
161
        add_special_atoms(spec_atoms)  # TODO check usage
162
    return Atoms(symbols=symbols, positions=positions)
163

    
164

    
165
def collect_coords(conf_list, code, run_type, spec_atms=tuple()):
166
    """Directs the reading of coordinates on a set of subdirectories.
167

168
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
169
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
170
    calculations produced by a given code, stored in every conf_X subdirectory,
171
    it collects the coordinates of the specified conf_X subdirectories in a
172
    single run_type by calling the adequate function (depending on the code) and
173
    returns a list of ase.Atoms objects.
174

175
    @param conf_list: List of directories where to read the coords from.
176
    @param code: the code that produced the calculation results files.
177
    @param run_type: the type of calculation (and also the name of the folder)
178
                     containing the calculation subdirectories.
179
    @param spec_atms: List of tuples containing pairs of new/traditional
180
        chemical symbols.
181
    @return: list of ase.Atoms objects.
182
    """
183
    from glob import glob
184
    atoms_list = []
185
    for conf in conf_list:
186
        if code == 'cp2k':
187
            atoms_list.append(read_coords_cp2k(glob(f"{run_type}/{conf}/*-1"
188
                                                    f".restart")[0], spec_atms))
189
        # elif code == 'vasp'
190
    return atoms_list
191

    
192

    
193
def read_energy_cp2k(file):
194
    """Reads the CP2K out file and returns its final energy.
195

196
    @param file: The file from which the energy should be read.
197
    @return: The last energy on the out file.
198
    """
199
    out_fh = open(file, 'r')
200
    energy = None
201
    for line in out_fh:
202
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
203
            energy = float(line.strip().split(':')[1]) * 27.2113845  # Ha to eV
204
    out_fh.close()
205
    return energy
206

    
207

    
208
def collect_energies(conf_list, code, run_type):
209
    """Directs the reading of energies on a set of subdirectories.
210

211
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
212
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
213
    calculations produced by a given code, stored in every conf_X subdirectory,
214
    it collects the energies of the specified conf_X subdirectories in a
215
    single run_type by calling the adequate function (depending on the code) and
216
    returns a list of energies.
217

218
    @param conf_list: List of directories where to read the energy.
219
    @param code: The code that produced the calculation output files.
220
    @param run_type: The type of calculation (and also the name of the folder)
221
                     containing the calculation subdirectories.
222
    @return: List of energies
223
    """
224
    from glob import glob
225
    import numpy as np
226

    
227
    energies = []
228
    for conf in conf_list:
229
        if code == 'cp2k':
230
            energies.append(read_energy_cp2k(
231
                glob(f"{run_type}/{conf}/*.out")[0]))
232

    
233
    if len(energies) == 0:
234
        err = f"No results found on {run_type}"
235
        logger.error(err)
236
        raise FileNotFoundError(err)
237

    
238
    return np.array(energies)