Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ e8bebcca

Historique | Voir | Annoter | Télécharger (9,71 ko)

1 fd2384fc Carles Marti
"""Module for the interconversion between different kinds of atomic data.
2 e23f119b Carles

3 e23f119b Carles
functions:
4 fd2384fc Carles Marti
confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
5 fd2384fc Carles Marti
    of separate rdkit.Mol objects.
6 fd2384fc Carles Marti
rdkit_mol_to_ase_atoms: Converts a rdkit.Mol object into ase.Atoms object.
7 fd2384fc Carles Marti
add_special_atoms: Allows ase to use custom elements with symbols not in the
8 fd2384fc Carles Marti
    periodic table.
9 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
10 fd2384fc Carles Marti
read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
11 fd2384fc Carles Marti
    ase.Atoms object.
12 fd2384fc Carles Marti
collect_coords: Directs the reading of coordinates on a set of subdirectories.
13 fd2384fc Carles Marti
read_energy_cp2k: Reads the CP2K out file and returns its final energy.
14 fd2384fc Carles Marti
collect_energies: Directs the reading of energies on a set of subdirectories.
15 e23f119b Carles
"""
16 e23f119b Carles
17 e23f119b Carles
import logging
18 f3004731 Carles
19 f3004731 Carles
import rdkit.Chem.AllChem as Chem
20 f3004731 Carles
21 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
22 e23f119b Carles
23 e23f119b Carles
24 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
25 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
26 f3004731 Carles
    separate mol objects.
27 f3004731 Carles

28 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
29 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
30 9510666f Carles
        all conformers are considered.
31 f3004731 Carles
    @return: list of separate mol objects.
32 f3004731 Carles
    """
33 f3004731 Carles
    if idx_lst is None:
34 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
35 f3004731 Carles
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
36 f3004731 Carles
                                 removeHs=False) for idx in idx_lst]
37 f3004731 Carles
38 f3004731 Carles
39 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
40 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
41 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
42 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
43 f3004731 Carles
    """
44 f3004731 Carles
    from ase import Atoms
45 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
46 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
47 695dcff8 Carles Marti
                       'converting to Atoms only the first conformer.')
48 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
49 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
50 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
51 f3004731 Carles
52 f3004731 Carles
53 cc92f6ee Carles Marti
def add_special_atoms(symbol_pairs):
54 fd2384fc Carles Marti
    """Allows ase to use custom elements with symbols not in the periodic table.
55 cc92f6ee Carles Marti

56 cc92f6ee Carles Marti
    This function adds new chemical elements to be used by ase. Every new custom
57 cc92f6ee Carles Marti
    element must have a traditional (present in the periodic table) partner
58 cc92f6ee Carles Marti
    from which to obtain all its properties.
59 fd2384fc Carles Marti
    @param symbol_pairs: List of tuples containing pairs of chemical symbols.
60 cc92f6ee Carles Marti
        Every tuple contains a pair of chemical symbols, the first label must be
61 cc92f6ee Carles Marti
        the label of the custom element and the second one the symbol of the
62 cc92f6ee Carles Marti
        reference one (traditional present on the periodic table).
63 cc92f6ee Carles Marti
    @return:
64 cc92f6ee Carles Marti
    """
65 cc92f6ee Carles Marti
    import numpy as np
66 cc92f6ee Carles Marti
    from ase import data
67 cc92f6ee Carles Marti
    for i, pair in enumerate(symbol_pairs):
68 cc92f6ee Carles Marti
        data.chemical_symbols += [pair[0]]
69 cc92f6ee Carles Marti
        z_orig = data.atomic_numbers[pair[1]]
70 cc92f6ee Carles Marti
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
71 cc92f6ee Carles Marti
        orig_com_mass = data.atomic_masses_common[z_orig]
72 cc92f6ee Carles Marti
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
73 cc92f6ee Carles Marti
        data.atomic_names += [pair[0]]
74 cc92f6ee Carles Marti
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
75 cc92f6ee Carles Marti
                                                 orig_iupac_mass)
76 cc92f6ee Carles Marti
        data.atomic_masses = data.atomic_masses_iupac2016
77 cc92f6ee Carles Marti
        data.atomic_masses_common = np.append(data.atomic_masses_common,
78 cc92f6ee Carles Marti
                                              orig_com_mass)
79 cc92f6ee Carles Marti
        data.covalent_radii = np.append(data.covalent_radii,
80 cc92f6ee Carles Marti
                                        data.covalent_radii[z_orig])
81 cc92f6ee Carles Marti
        data.reference_states += [data.reference_states[z_orig]]
82 90819cc3 Carles Marti
        # TODO Add vdw_radii, gsmm and aml (smaller length)
83 cc92f6ee Carles Marti
84 cc92f6ee Carles Marti
85 90819cc3 Carles Marti
def adapt_format(requirement, coord_file, spec_atms=tuple()):
86 e23f119b Carles
    """Converts the coordinate files into a required library object type.
87 e23f119b Carles

88 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
89 e23f119b Carles
    coordinate file into a library-workable object.
90 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
91 e23f119b Carles
    made. Accepted values: 'ase', 'rdkit'.
92 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
93 e23f119b Carles
    Accepted file tyoes: 'xyz', 'mol'.
94 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
95 90819cc3 Carles Marti
        chemical symbols.
96 e23f119b Carles
    @return: an object the required library can work with.
97 e23f119b Carles
    """
98 439ce5f7 Carles
    import ase.io
99 8ab593ee Carles
    from ase.io.formats import filetype
100 8ab593ee Carles
101 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
102 8ab593ee Carles
    file_type_vals = ['xyz', 'mol']
103 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
104 4381145e Carles
              f" has not yet been implemented"
105 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
106 4381145e Carles
107 f3004731 Carles
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
108 4381145e Carles
109 4381145e Carles
    if requirement not in req_vals:
110 9f7bb440 Carles
        logger.error(lib_err)
111 4381145e Carles
        raise NotImplementedError(lib_err)
112 4381145e Carles
113 4381145e Carles
    if filetype(coord_file) not in file_type_vals:
114 9f7bb440 Carles
        logger.error(fil_type_err)
115 4381145e Carles
        raise NotImplementedError(fil_type_err)
116 8ab593ee Carles
117 8ab593ee Carles
    if requirement == 'rdkit':
118 8ab593ee Carles
        if filetype(coord_file) == 'xyz':
119 af3e2441 Carles Marti
            from modules.xyz2mol import xyz2mol
120 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
121 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
122 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
123 b6f47f2d Carles
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
124 8d08beb4 Carles
            logger.debug(conv_info)
125 21e2cca5 Carles Marti
            return Chem.AddHs(rd_mol_obj)
126 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
127 8d08beb4 Carles
            logger.debug(conv_info)
128 21e2cca5 Carles Marti
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
129 8ab593ee Carles
130 8ab593ee Carles
    if requirement == 'ase':
131 cc92f6ee Carles Marti
        add_special_atoms(spec_atms)
132 21e2cca5 Carles Marti
        if filetype(coord_file) == 'xyz':
133 21e2cca5 Carles Marti
            logger.debug(conv_info)
134 21e2cca5 Carles Marti
            return ase.io.read(coord_file)
135 21e2cca5 Carles Marti
        elif filetype(coord_file) == 'mol':
136 21e2cca5 Carles Marti
            logger.debug(conv_info)
137 21e2cca5 Carles Marti
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
138 21e2cca5 Carles Marti
            return rdkit_mol_to_ase_atoms(rd_mol)
139 b6f47f2d Carles
140 b6f47f2d Carles
141 ed01b5b7 Carles Marti
def read_coords_cp2k(file, spec_atoms=tuple()):
142 ed01b5b7 Carles Marti
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
143 ed01b5b7 Carles Marti
     object.
144 ed01b5b7 Carles Marti

145 ed01b5b7 Carles Marti
    @param file: The file to read containing the coordinates.
146 ed01b5b7 Carles Marti
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
147 ed01b5b7 Carles Marti
    @return: ase.Atoms object of the coordinates in the file.
148 ed01b5b7 Carles Marti
    """
149 ed01b5b7 Carles Marti
    import numpy as np
150 ed01b5b7 Carles Marti
    from ase import Atoms
151 ed01b5b7 Carles Marti
    from pycp2k import CP2K
152 ed01b5b7 Carles Marti
153 ed01b5b7 Carles Marti
    cp2k = CP2K()
154 ed01b5b7 Carles Marti
    cp2k.parse(file)
155 ed01b5b7 Carles Marti
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
156 ed01b5b7 Carles Marti
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
157 ed01b5b7 Carles Marti
    symbols = [atom.split()[0] for atom in raw_coords]
158 ed01b5b7 Carles Marti
    positions = np.array([atom.split()[1:] for atom in raw_coords])
159 ed01b5b7 Carles Marti
    if len(spec_atoms) > 0:
160 ed01b5b7 Carles Marti
        add_special_atoms(spec_atoms)  # TODO check usage
161 ed01b5b7 Carles Marti
    return Atoms(symbols=symbols, positions=positions)
162 ed01b5b7 Carles Marti
163 ed01b5b7 Carles Marti
164 fd2384fc Carles Marti
def collect_coords(conf_list, code, run_type, spec_atms=tuple()):
165 fd2384fc Carles Marti
    """Directs the reading of coordinates on a set of subdirectories.
166 b6f47f2d Carles

167 fd2384fc Carles Marti
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
168 fd2384fc Carles Marti
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
169 fd2384fc Carles Marti
    calculations produced by a given code, stored in every conf_X subdirectory,
170 fd2384fc Carles Marti
    it collects the coordinates of the specified conf_X subdirectories in a
171 fd2384fc Carles Marti
    single run_type by calling the adequate function (depending on the code) and
172 fd2384fc Carles Marti
    returns a list of ase.Atoms objects.
173 b6f47f2d Carles

174 fd2384fc Carles Marti
    @param conf_list: List of directories where to read the coords from.
175 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
176 b6f47f2d Carles
    @param run_type: the type of calculation (and also the name of the folder)
177 b6f47f2d Carles
                     containing the calculation subdirectories.
178 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
179 90819cc3 Carles Marti
        chemical symbols.
180 fd2384fc Carles Marti
    @return: list of ase.Atoms objects.
181 b6f47f2d Carles
    """
182 e7d9c7e8 Carles Marti
    from glob import glob
183 c6e71e46 Carles Marti
    atoms_list = []
184 ed01b5b7 Carles Marti
    for conf in conf_list:
185 ed01b5b7 Carles Marti
        if code == 'cp2k':
186 e1c5f171 Carles Marti
            atoms_list.append(read_coords_cp2k(glob(f"{run_type}/{conf}/*-1"
187 e1c5f171 Carles Marti
                                                    f".restart")[0], spec_atms))
188 fd2384fc Carles Marti
        # elif code == 'vasp'
189 c6e71e46 Carles Marti
    return atoms_list
190 f8c4eafe Carles
191 f8c4eafe Carles
192 ed01b5b7 Carles Marti
def read_energy_cp2k(file):
193 fd2384fc Carles Marti
    """Reads the CP2K out file and returns its final energy.
194 ed01b5b7 Carles Marti

195 ed01b5b7 Carles Marti
    @param file: The file from which the energy should be read.
196 ed01b5b7 Carles Marti
    @return: The last energy on the out file.
197 ed01b5b7 Carles Marti
    """
198 ed01b5b7 Carles Marti
    out_fh = open(file, 'r')
199 ed01b5b7 Carles Marti
    energy = None
200 ed01b5b7 Carles Marti
    for line in out_fh:
201 ed01b5b7 Carles Marti
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
202 9371fd91 Carles Marti
            energy = float(line.strip().split(':')[1]) * 27.2113845  # Ha to eV
203 ed01b5b7 Carles Marti
    out_fh.close()
204 ed01b5b7 Carles Marti
    return energy
205 ed01b5b7 Carles Marti
206 ed01b5b7 Carles Marti
207 fd2384fc Carles Marti
def collect_energies(conf_list, code, run_type):
208 fd2384fc Carles Marti
    """Directs the reading of energies on a set of subdirectories.
209 f8c4eafe Carles

210 fd2384fc Carles Marti
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
211 fd2384fc Carles Marti
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
212 fd2384fc Carles Marti
    calculations produced by a given code, stored in every conf_X subdirectory,
213 fd2384fc Carles Marti
    it collects the energies of the specified conf_X subdirectories in a
214 fd2384fc Carles Marti
    single run_type by calling the adequate function (depending on the code) and
215 fd2384fc Carles Marti
    returns a list of energies.
216 f8c4eafe Carles

217 ed01b5b7 Carles Marti
    @param conf_list: List of directories where to read the energy.
218 fd2384fc Carles Marti
    @param code: The code that produced the calculation output files.
219 fd2384fc Carles Marti
    @param run_type: The type of calculation (and also the name of the folder)
220 f8c4eafe Carles
                     containing the calculation subdirectories.
221 fd2384fc Carles Marti
    @return: List of energies
222 f8c4eafe Carles
    """
223 fd2384fc Carles Marti
    from glob import glob
224 f8c4eafe Carles
    import numpy as np
225 f8c4eafe Carles
226 f8c4eafe Carles
    energies = []
227 ed01b5b7 Carles Marti
    for conf in conf_list:
228 ed01b5b7 Carles Marti
        if code == 'cp2k':
229 fd2384fc Carles Marti
            energies.append(read_energy_cp2k(
230 fd2384fc Carles Marti
                glob(f"{run_type}/{conf}/*.out")[0]))
231 ed01b5b7 Carles Marti
232 ed01b5b7 Carles Marti
    if len(energies) == 0:
233 ed01b5b7 Carles Marti
        err = f"No results found on {run_type}"
234 ed01b5b7 Carles Marti
        logger.error(err)
235 ed01b5b7 Carles Marti
        raise FileNotFoundError(err)
236 f8c4eafe Carles
237 f8c4eafe Carles
    return np.array(energies)