Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ 748a6036

Historique | Voir | Annoter | Télécharger (10,26 ko)

1 fd2384fc Carles Marti
"""Module for the interconversion between different kinds of atomic data.
2 e23f119b Carles

3 e23f119b Carles
functions:
4 fd2384fc Carles Marti
confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
5 fd2384fc Carles Marti
    of separate rdkit.Mol objects.
6 fd2384fc Carles Marti
rdkit_mol_to_ase_atoms: Converts a rdkit.Mol object into ase.Atoms object.
7 fd2384fc Carles Marti
add_special_atoms: Allows ase to use custom elements with symbols not in the
8 fd2384fc Carles Marti
    periodic table.
9 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
10 fd2384fc Carles Marti
read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
11 fd2384fc Carles Marti
    ase.Atoms object.
12 fd2384fc Carles Marti
collect_coords: Directs the reading of coordinates on a set of subdirectories.
13 fd2384fc Carles Marti
read_energy_cp2k: Reads the CP2K out file and returns its final energy.
14 fd2384fc Carles Marti
collect_energies: Directs the reading of energies on a set of subdirectories.
15 e23f119b Carles
"""
16 e23f119b Carles
17 e23f119b Carles
import logging
18 f3004731 Carles
19 f3004731 Carles
import rdkit.Chem.AllChem as Chem
20 f3004731 Carles
21 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
22 e23f119b Carles
23 e23f119b Carles
24 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
25 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
26 f3004731 Carles
    separate mol objects.
27 f3004731 Carles

28 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
29 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
30 9510666f Carles
        all conformers are considered.
31 f3004731 Carles
    @return: list of separate mol objects.
32 f3004731 Carles
    """
33 f3004731 Carles
    if idx_lst is None:
34 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
35 f02e613b Carles Marti
    return [Chem.MolFromMolBlock(
36 f02e613b Carles Marti
        Chem.MolToMolBlock(mol, confId=int(idx)).replace("3D", ""),
37 f02e613b Carles Marti
        removeHs=False) for idx in idx_lst]
38 f3004731 Carles
39 f3004731 Carles
40 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
41 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
42 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
43 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
44 f3004731 Carles
    """
45 f3004731 Carles
    from ase import Atoms
46 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
47 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
48 695dcff8 Carles Marti
                       'converting to Atoms only the first conformer.')
49 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
50 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
51 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
52 f3004731 Carles
53 f3004731 Carles
54 cc92f6ee Carles Marti
def add_special_atoms(symbol_pairs):
55 fd2384fc Carles Marti
    """Allows ase to use custom elements with symbols not in the periodic table.
56 cc92f6ee Carles Marti

57 cc92f6ee Carles Marti
    This function adds new chemical elements to be used by ase. Every new custom
58 cc92f6ee Carles Marti
    element must have a traditional (present in the periodic table) partner
59 cc92f6ee Carles Marti
    from which to obtain all its properties.
60 fd2384fc Carles Marti
    @param symbol_pairs: List of tuples containing pairs of chemical symbols.
61 cc92f6ee Carles Marti
        Every tuple contains a pair of chemical symbols, the first label must be
62 cc92f6ee Carles Marti
        the label of the custom element and the second one the symbol of the
63 cc92f6ee Carles Marti
        reference one (traditional present on the periodic table).
64 cc92f6ee Carles Marti
    @return:
65 587dca22 Carles Marti
    """  # TODO Enable special atoms for rdkit
66 cc92f6ee Carles Marti
    import numpy as np
67 cc92f6ee Carles Marti
    from ase import data
68 cc92f6ee Carles Marti
    for i, pair in enumerate(symbol_pairs):
69 cc92f6ee Carles Marti
        data.chemical_symbols += [pair[0]]
70 cc92f6ee Carles Marti
        z_orig = data.atomic_numbers[pair[1]]
71 cc92f6ee Carles Marti
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
72 cc92f6ee Carles Marti
        orig_com_mass = data.atomic_masses_common[z_orig]
73 cc92f6ee Carles Marti
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
74 cc92f6ee Carles Marti
        data.atomic_names += [pair[0]]
75 cc92f6ee Carles Marti
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
76 cc92f6ee Carles Marti
                                                 orig_iupac_mass)
77 cc92f6ee Carles Marti
        data.atomic_masses = data.atomic_masses_iupac2016
78 cc92f6ee Carles Marti
        data.atomic_masses_common = np.append(data.atomic_masses_common,
79 cc92f6ee Carles Marti
                                              orig_com_mass)
80 cc92f6ee Carles Marti
        data.covalent_radii = np.append(data.covalent_radii,
81 cc92f6ee Carles Marti
                                        data.covalent_radii[z_orig])
82 cc92f6ee Carles Marti
        data.reference_states += [data.reference_states[z_orig]]
83 90819cc3 Carles Marti
        # TODO Add vdw_radii, gsmm and aml (smaller length)
84 cc92f6ee Carles Marti
85 cc92f6ee Carles Marti
86 90819cc3 Carles Marti
def adapt_format(requirement, coord_file, spec_atms=tuple()):
87 e23f119b Carles
    """Converts the coordinate files into a required library object type.
88 e23f119b Carles

89 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
90 e23f119b Carles
    coordinate file into a library-workable object.
91 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
92 e23f119b Carles
    made. Accepted values: 'ase', 'rdkit'.
93 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
94 e23f119b Carles
    Accepted file tyoes: 'xyz', 'mol'.
95 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
96 90819cc3 Carles Marti
        chemical symbols.
97 e23f119b Carles
    @return: an object the required library can work with.
98 748a6036 Carles Martí
    """
99 439ce5f7 Carles
    import ase.io
100 8ab593ee Carles
    from ase.io.formats import filetype
101 8ab593ee Carles
102 748a6036 Carles Martí
    from modules.utilities import try_command
103 748a6036 Carles Martí
104 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
105 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
106 4381145e Carles
              f" has not yet been implemented"
107 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
108 4381145e Carles
109 748a6036 Carles Martí
    fil_type_err = f'The {filetype(coord_file)} file format is not supported'
110 4381145e Carles
111 4381145e Carles
    if requirement not in req_vals:
112 9f7bb440 Carles
        logger.error(lib_err)
113 4381145e Carles
        raise NotImplementedError(lib_err)
114 4381145e Carles
115 8ab593ee Carles
    if requirement == 'rdkit':
116 748a6036 Carles Martí
        from modules.xyz2mol import xyz2mol
117 748a6036 Carles Martí
        if filetype(coord_file) == 'xyz':  # TODO Include detection of charges.
118 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
119 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
120 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
121 748a6036 Carles Martí
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates)
122 8d08beb4 Carles
            logger.debug(conv_info)
123 21e2cca5 Carles Marti
            return Chem.AddHs(rd_mol_obj)
124 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
125 8d08beb4 Carles
            logger.debug(conv_info)
126 21e2cca5 Carles Marti
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
127 748a6036 Carles Martí
        else:
128 748a6036 Carles Martí
            ase_atms = try_command(ase.io.read,
129 748a6036 Carles Martí
                                   [(ase.io.formats.UnknownFileTypeError,
130 748a6036 Carles Martí
                                     fil_type_err)],
131 748a6036 Carles Martí
                                   coord_file)
132 748a6036 Carles Martí
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
133 748a6036 Carles Martí
            xyz_coordinates = ase_atms.positions.tolist()
134 748a6036 Carles Martí
            return xyz2mol(atomic_nums, xyz_coordinates)
135 8ab593ee Carles
136 8ab593ee Carles
    if requirement == 'ase':
137 cc92f6ee Carles Marti
        add_special_atoms(spec_atms)
138 21e2cca5 Carles Marti
        if filetype(coord_file) == 'xyz':
139 21e2cca5 Carles Marti
            logger.debug(conv_info)
140 21e2cca5 Carles Marti
            return ase.io.read(coord_file)
141 21e2cca5 Carles Marti
        elif filetype(coord_file) == 'mol':
142 21e2cca5 Carles Marti
            logger.debug(conv_info)
143 21e2cca5 Carles Marti
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
144 21e2cca5 Carles Marti
            return rdkit_mol_to_ase_atoms(rd_mol)
145 748a6036 Carles Martí
        else:
146 748a6036 Carles Martí
            return try_command(ase.io.read,
147 748a6036 Carles Martí
                               [(ase.io.formats.UnknownFileTypeError,
148 748a6036 Carles Martí
                                 fil_type_err)],
149 748a6036 Carles Martí
                               coord_file)
150 b6f47f2d Carles
151 b6f47f2d Carles
152 ed01b5b7 Carles Marti
def read_coords_cp2k(file, spec_atoms=tuple()):
153 ed01b5b7 Carles Marti
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
154 ed01b5b7 Carles Marti
     object.
155 ed01b5b7 Carles Marti

156 ed01b5b7 Carles Marti
    @param file: The file to read containing the coordinates.
157 ed01b5b7 Carles Marti
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
158 ed01b5b7 Carles Marti
    @return: ase.Atoms object of the coordinates in the file.
159 ed01b5b7 Carles Marti
    """
160 ed01b5b7 Carles Marti
    import numpy as np
161 ed01b5b7 Carles Marti
    from ase import Atoms
162 ed01b5b7 Carles Marti
    from pycp2k import CP2K
163 ed01b5b7 Carles Marti
164 ed01b5b7 Carles Marti
    cp2k = CP2K()
165 ed01b5b7 Carles Marti
    cp2k.parse(file)
166 ed01b5b7 Carles Marti
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
167 ed01b5b7 Carles Marti
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
168 ed01b5b7 Carles Marti
    symbols = [atom.split()[0] for atom in raw_coords]
169 ed01b5b7 Carles Marti
    positions = np.array([atom.split()[1:] for atom in raw_coords])
170 ed01b5b7 Carles Marti
    if len(spec_atoms) > 0:
171 4387ce69 Carles Marti
        add_special_atoms(spec_atoms)
172 ed01b5b7 Carles Marti
    return Atoms(symbols=symbols, positions=positions)
173 ed01b5b7 Carles Marti
174 ed01b5b7 Carles Marti
175 fd2384fc Carles Marti
def collect_coords(conf_list, code, run_type, spec_atms=tuple()):
176 fd2384fc Carles Marti
    """Directs the reading of coordinates on a set of subdirectories.
177 b6f47f2d Carles

178 fd2384fc Carles Marti
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
179 fd2384fc Carles Marti
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
180 fd2384fc Carles Marti
    calculations produced by a given code, stored in every conf_X subdirectory,
181 fd2384fc Carles Marti
    it collects the coordinates of the specified conf_X subdirectories in a
182 fd2384fc Carles Marti
    single run_type by calling the adequate function (depending on the code) and
183 fd2384fc Carles Marti
    returns a list of ase.Atoms objects.
184 b6f47f2d Carles

185 fd2384fc Carles Marti
    @param conf_list: List of directories where to read the coords from.
186 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
187 b6f47f2d Carles
    @param run_type: the type of calculation (and also the name of the folder)
188 b6f47f2d Carles
                     containing the calculation subdirectories.
189 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
190 90819cc3 Carles Marti
        chemical symbols.
191 fd2384fc Carles Marti
    @return: list of ase.Atoms objects.
192 b6f47f2d Carles
    """
193 e7d9c7e8 Carles Marti
    from glob import glob
194 c6e71e46 Carles Marti
    atoms_list = []
195 ed01b5b7 Carles Marti
    for conf in conf_list:
196 ed01b5b7 Carles Marti
        if code == 'cp2k':
197 e1c5f171 Carles Marti
            atoms_list.append(read_coords_cp2k(glob(f"{run_type}/{conf}/*-1"
198 e1c5f171 Carles Marti
                                                    f".restart")[0], spec_atms))
199 fd2384fc Carles Marti
        # elif code == 'vasp'
200 c6e71e46 Carles Marti
    return atoms_list
201 f8c4eafe Carles
202 f8c4eafe Carles
203 ed01b5b7 Carles Marti
def read_energy_cp2k(file):
204 fd2384fc Carles Marti
    """Reads the CP2K out file and returns its final energy.
205 ed01b5b7 Carles Marti

206 ed01b5b7 Carles Marti
    @param file: The file from which the energy should be read.
207 ed01b5b7 Carles Marti
    @return: The last energy on the out file.
208 ed01b5b7 Carles Marti
    """
209 ed01b5b7 Carles Marti
    out_fh = open(file, 'r')
210 ed01b5b7 Carles Marti
    energy = None
211 ed01b5b7 Carles Marti
    for line in out_fh:
212 ed01b5b7 Carles Marti
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
213 9371fd91 Carles Marti
            energy = float(line.strip().split(':')[1]) * 27.2113845  # Ha to eV
214 ed01b5b7 Carles Marti
    out_fh.close()
215 ed01b5b7 Carles Marti
    return energy
216 ed01b5b7 Carles Marti
217 ed01b5b7 Carles Marti
218 fd2384fc Carles Marti
def collect_energies(conf_list, code, run_type):
219 fd2384fc Carles Marti
    """Directs the reading of energies on a set of subdirectories.
220 f8c4eafe Carles

221 fd2384fc Carles Marti
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
222 fd2384fc Carles Marti
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
223 fd2384fc Carles Marti
    calculations produced by a given code, stored in every conf_X subdirectory,
224 fd2384fc Carles Marti
    it collects the energies of the specified conf_X subdirectories in a
225 fd2384fc Carles Marti
    single run_type by calling the adequate function (depending on the code) and
226 fd2384fc Carles Marti
    returns a list of energies.
227 f8c4eafe Carles

228 ed01b5b7 Carles Marti
    @param conf_list: List of directories where to read the energy.
229 fd2384fc Carles Marti
    @param code: The code that produced the calculation output files.
230 fd2384fc Carles Marti
    @param run_type: The type of calculation (and also the name of the folder)
231 f8c4eafe Carles
                     containing the calculation subdirectories.
232 fd2384fc Carles Marti
    @return: List of energies
233 f8c4eafe Carles
    """
234 fd2384fc Carles Marti
    from glob import glob
235 f8c4eafe Carles
    import numpy as np
236 f8c4eafe Carles
237 f8c4eafe Carles
    energies = []
238 ed01b5b7 Carles Marti
    for conf in conf_list:
239 ed01b5b7 Carles Marti
        if code == 'cp2k':
240 fd2384fc Carles Marti
            energies.append(read_energy_cp2k(
241 fd2384fc Carles Marti
                glob(f"{run_type}/{conf}/*.out")[0]))
242 ed01b5b7 Carles Marti
243 ed01b5b7 Carles Marti
    if len(energies) == 0:
244 ed01b5b7 Carles Marti
        err = f"No results found on {run_type}"
245 ed01b5b7 Carles Marti
        logger.error(err)
246 ed01b5b7 Carles Marti
        raise FileNotFoundError(err)
247 f8c4eafe Carles
248 f8c4eafe Carles
    return np.array(energies)