Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ 78fcb188

Historique | Voir | Annoter | Télécharger (9,76 ko)

1 fd2384fc Carles Marti
"""Module for the interconversion between different kinds of atomic data.
2 e23f119b Carles

3 e23f119b Carles
functions:
4 fd2384fc Carles Marti
confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
5 fd2384fc Carles Marti
    of separate rdkit.Mol objects.
6 fd2384fc Carles Marti
rdkit_mol_to_ase_atoms: Converts a rdkit.Mol object into ase.Atoms object.
7 fd2384fc Carles Marti
add_special_atoms: Allows ase to use custom elements with symbols not in the
8 fd2384fc Carles Marti
    periodic table.
9 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
10 fd2384fc Carles Marti
read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
11 fd2384fc Carles Marti
    ase.Atoms object.
12 fd2384fc Carles Marti
collect_coords: Directs the reading of coordinates on a set of subdirectories.
13 fd2384fc Carles Marti
read_energy_cp2k: Reads the CP2K out file and returns its final energy.
14 fd2384fc Carles Marti
collect_energies: Directs the reading of energies on a set of subdirectories.
15 e23f119b Carles
"""
16 e23f119b Carles
17 e23f119b Carles
import logging
18 f3004731 Carles
19 f3004731 Carles
import rdkit.Chem.AllChem as Chem
20 f3004731 Carles
21 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
22 e23f119b Carles
23 e23f119b Carles
24 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
25 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
26 f3004731 Carles
    separate mol objects.
27 f3004731 Carles

28 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
29 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
30 9510666f Carles
        all conformers are considered.
31 f3004731 Carles
    @return: list of separate mol objects.
32 f3004731 Carles
    """
33 f3004731 Carles
    if idx_lst is None:
34 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
35 f02e613b Carles Marti
    return [Chem.MolFromMolBlock(
36 f02e613b Carles Marti
        Chem.MolToMolBlock(mol, confId=int(idx)).replace("3D", ""),
37 f02e613b Carles Marti
        removeHs=False) for idx in idx_lst]
38 f3004731 Carles
39 f3004731 Carles
40 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
41 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
42 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
43 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
44 f3004731 Carles
    """
45 f3004731 Carles
    from ase import Atoms
46 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
47 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
48 695dcff8 Carles Marti
                       'converting to Atoms only the first conformer.')
49 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
50 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
51 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
52 f3004731 Carles
53 f3004731 Carles
54 cc92f6ee Carles Marti
def add_special_atoms(symbol_pairs):
55 fd2384fc Carles Marti
    """Allows ase to use custom elements with symbols not in the periodic table.
56 cc92f6ee Carles Marti

57 cc92f6ee Carles Marti
    This function adds new chemical elements to be used by ase. Every new custom
58 cc92f6ee Carles Marti
    element must have a traditional (present in the periodic table) partner
59 cc92f6ee Carles Marti
    from which to obtain all its properties.
60 fd2384fc Carles Marti
    @param symbol_pairs: List of tuples containing pairs of chemical symbols.
61 cc92f6ee Carles Marti
        Every tuple contains a pair of chemical symbols, the first label must be
62 cc92f6ee Carles Marti
        the label of the custom element and the second one the symbol of the
63 cc92f6ee Carles Marti
        reference one (traditional present on the periodic table).
64 cc92f6ee Carles Marti
    @return:
65 587dca22 Carles Marti
    """  # TODO Enable special atoms for rdkit
66 cc92f6ee Carles Marti
    import numpy as np
67 cc92f6ee Carles Marti
    from ase import data
68 cc92f6ee Carles Marti
    for i, pair in enumerate(symbol_pairs):
69 cc92f6ee Carles Marti
        data.chemical_symbols += [pair[0]]
70 cc92f6ee Carles Marti
        z_orig = data.atomic_numbers[pair[1]]
71 cc92f6ee Carles Marti
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
72 cc92f6ee Carles Marti
        orig_com_mass = data.atomic_masses_common[z_orig]
73 cc92f6ee Carles Marti
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
74 cc92f6ee Carles Marti
        data.atomic_names += [pair[0]]
75 cc92f6ee Carles Marti
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
76 cc92f6ee Carles Marti
                                                 orig_iupac_mass)
77 cc92f6ee Carles Marti
        data.atomic_masses = data.atomic_masses_iupac2016
78 cc92f6ee Carles Marti
        data.atomic_masses_common = np.append(data.atomic_masses_common,
79 cc92f6ee Carles Marti
                                              orig_com_mass)
80 cc92f6ee Carles Marti
        data.covalent_radii = np.append(data.covalent_radii,
81 cc92f6ee Carles Marti
                                        data.covalent_radii[z_orig])
82 cc92f6ee Carles Marti
        data.reference_states += [data.reference_states[z_orig]]
83 90819cc3 Carles Marti
        # TODO Add vdw_radii, gsmm and aml (smaller length)
84 cc92f6ee Carles Marti
85 cc92f6ee Carles Marti
86 90819cc3 Carles Marti
def adapt_format(requirement, coord_file, spec_atms=tuple()):
87 e23f119b Carles
    """Converts the coordinate files into a required library object type.
88 e23f119b Carles

89 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
90 e23f119b Carles
    coordinate file into a library-workable object.
91 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
92 e23f119b Carles
    made. Accepted values: 'ase', 'rdkit'.
93 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
94 e23f119b Carles
    Accepted file tyoes: 'xyz', 'mol'.
95 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
96 90819cc3 Carles Marti
        chemical symbols.
97 e23f119b Carles
    @return: an object the required library can work with.
98 587dca22 Carles Marti
    """  # TODO POSCAR/CONTCAR files
99 439ce5f7 Carles
    import ase.io
100 8ab593ee Carles
    from ase.io.formats import filetype
101 8ab593ee Carles
102 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
103 8ab593ee Carles
    file_type_vals = ['xyz', 'mol']
104 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
105 4381145e Carles
              f" has not yet been implemented"
106 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
107 4381145e Carles
108 f3004731 Carles
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
109 4381145e Carles
110 4381145e Carles
    if requirement not in req_vals:
111 9f7bb440 Carles
        logger.error(lib_err)
112 4381145e Carles
        raise NotImplementedError(lib_err)
113 4381145e Carles
114 4381145e Carles
    if filetype(coord_file) not in file_type_vals:
115 9f7bb440 Carles
        logger.error(fil_type_err)
116 4381145e Carles
        raise NotImplementedError(fil_type_err)
117 8ab593ee Carles
118 8ab593ee Carles
    if requirement == 'rdkit':
119 8ab593ee Carles
        if filetype(coord_file) == 'xyz':
120 af3e2441 Carles Marti
            from modules.xyz2mol import xyz2mol
121 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
122 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
123 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
124 b6f47f2d Carles
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
125 8d08beb4 Carles
            logger.debug(conv_info)
126 21e2cca5 Carles Marti
            return Chem.AddHs(rd_mol_obj)
127 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
128 8d08beb4 Carles
            logger.debug(conv_info)
129 21e2cca5 Carles Marti
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
130 8ab593ee Carles
131 8ab593ee Carles
    if requirement == 'ase':
132 cc92f6ee Carles Marti
        add_special_atoms(spec_atms)
133 21e2cca5 Carles Marti
        if filetype(coord_file) == 'xyz':
134 21e2cca5 Carles Marti
            logger.debug(conv_info)
135 21e2cca5 Carles Marti
            return ase.io.read(coord_file)
136 21e2cca5 Carles Marti
        elif filetype(coord_file) == 'mol':
137 21e2cca5 Carles Marti
            logger.debug(conv_info)
138 21e2cca5 Carles Marti
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
139 21e2cca5 Carles Marti
            return rdkit_mol_to_ase_atoms(rd_mol)
140 b6f47f2d Carles
141 b6f47f2d Carles
142 ed01b5b7 Carles Marti
def read_coords_cp2k(file, spec_atoms=tuple()):
143 ed01b5b7 Carles Marti
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
144 ed01b5b7 Carles Marti
     object.
145 ed01b5b7 Carles Marti

146 ed01b5b7 Carles Marti
    @param file: The file to read containing the coordinates.
147 ed01b5b7 Carles Marti
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
148 ed01b5b7 Carles Marti
    @return: ase.Atoms object of the coordinates in the file.
149 ed01b5b7 Carles Marti
    """
150 ed01b5b7 Carles Marti
    import numpy as np
151 ed01b5b7 Carles Marti
    from ase import Atoms
152 ed01b5b7 Carles Marti
    from pycp2k import CP2K
153 ed01b5b7 Carles Marti
154 ed01b5b7 Carles Marti
    cp2k = CP2K()
155 ed01b5b7 Carles Marti
    cp2k.parse(file)
156 ed01b5b7 Carles Marti
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
157 ed01b5b7 Carles Marti
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
158 ed01b5b7 Carles Marti
    symbols = [atom.split()[0] for atom in raw_coords]
159 ed01b5b7 Carles Marti
    positions = np.array([atom.split()[1:] for atom in raw_coords])
160 ed01b5b7 Carles Marti
    if len(spec_atoms) > 0:
161 4387ce69 Carles Marti
        add_special_atoms(spec_atoms)
162 ed01b5b7 Carles Marti
    return Atoms(symbols=symbols, positions=positions)
163 ed01b5b7 Carles Marti
164 ed01b5b7 Carles Marti
165 fd2384fc Carles Marti
def collect_coords(conf_list, code, run_type, spec_atms=tuple()):
166 fd2384fc Carles Marti
    """Directs the reading of coordinates on a set of subdirectories.
167 b6f47f2d Carles

168 fd2384fc Carles Marti
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
169 fd2384fc Carles Marti
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
170 fd2384fc Carles Marti
    calculations produced by a given code, stored in every conf_X subdirectory,
171 fd2384fc Carles Marti
    it collects the coordinates of the specified conf_X subdirectories in a
172 fd2384fc Carles Marti
    single run_type by calling the adequate function (depending on the code) and
173 fd2384fc Carles Marti
    returns a list of ase.Atoms objects.
174 b6f47f2d Carles

175 fd2384fc Carles Marti
    @param conf_list: List of directories where to read the coords from.
176 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
177 b6f47f2d Carles
    @param run_type: the type of calculation (and also the name of the folder)
178 b6f47f2d Carles
                     containing the calculation subdirectories.
179 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
180 90819cc3 Carles Marti
        chemical symbols.
181 fd2384fc Carles Marti
    @return: list of ase.Atoms objects.
182 b6f47f2d Carles
    """
183 e7d9c7e8 Carles Marti
    from glob import glob
184 c6e71e46 Carles Marti
    atoms_list = []
185 ed01b5b7 Carles Marti
    for conf in conf_list:
186 ed01b5b7 Carles Marti
        if code == 'cp2k':
187 e1c5f171 Carles Marti
            atoms_list.append(read_coords_cp2k(glob(f"{run_type}/{conf}/*-1"
188 e1c5f171 Carles Marti
                                                    f".restart")[0], spec_atms))
189 fd2384fc Carles Marti
        # elif code == 'vasp'
190 c6e71e46 Carles Marti
    return atoms_list
191 f8c4eafe Carles
192 f8c4eafe Carles
193 ed01b5b7 Carles Marti
def read_energy_cp2k(file):
194 fd2384fc Carles Marti
    """Reads the CP2K out file and returns its final energy.
195 ed01b5b7 Carles Marti

196 ed01b5b7 Carles Marti
    @param file: The file from which the energy should be read.
197 ed01b5b7 Carles Marti
    @return: The last energy on the out file.
198 ed01b5b7 Carles Marti
    """
199 ed01b5b7 Carles Marti
    out_fh = open(file, 'r')
200 ed01b5b7 Carles Marti
    energy = None
201 ed01b5b7 Carles Marti
    for line in out_fh:
202 ed01b5b7 Carles Marti
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
203 9371fd91 Carles Marti
            energy = float(line.strip().split(':')[1]) * 27.2113845  # Ha to eV
204 ed01b5b7 Carles Marti
    out_fh.close()
205 ed01b5b7 Carles Marti
    return energy
206 ed01b5b7 Carles Marti
207 ed01b5b7 Carles Marti
208 fd2384fc Carles Marti
def collect_energies(conf_list, code, run_type):
209 fd2384fc Carles Marti
    """Directs the reading of energies on a set of subdirectories.
210 f8c4eafe Carles

211 fd2384fc Carles Marti
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
212 fd2384fc Carles Marti
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
213 fd2384fc Carles Marti
    calculations produced by a given code, stored in every conf_X subdirectory,
214 fd2384fc Carles Marti
    it collects the energies of the specified conf_X subdirectories in a
215 fd2384fc Carles Marti
    single run_type by calling the adequate function (depending on the code) and
216 fd2384fc Carles Marti
    returns a list of energies.
217 f8c4eafe Carles

218 ed01b5b7 Carles Marti
    @param conf_list: List of directories where to read the energy.
219 fd2384fc Carles Marti
    @param code: The code that produced the calculation output files.
220 fd2384fc Carles Marti
    @param run_type: The type of calculation (and also the name of the folder)
221 f8c4eafe Carles
                     containing the calculation subdirectories.
222 fd2384fc Carles Marti
    @return: List of energies
223 f8c4eafe Carles
    """
224 fd2384fc Carles Marti
    from glob import glob
225 f8c4eafe Carles
    import numpy as np
226 f8c4eafe Carles
227 f8c4eafe Carles
    energies = []
228 ed01b5b7 Carles Marti
    for conf in conf_list:
229 ed01b5b7 Carles Marti
        if code == 'cp2k':
230 fd2384fc Carles Marti
            energies.append(read_energy_cp2k(
231 fd2384fc Carles Marti
                glob(f"{run_type}/{conf}/*.out")[0]))
232 ed01b5b7 Carles Marti
233 ed01b5b7 Carles Marti
    if len(energies) == 0:
234 ed01b5b7 Carles Marti
        err = f"No results found on {run_type}"
235 ed01b5b7 Carles Marti
        logger.error(err)
236 ed01b5b7 Carles Marti
        raise FileNotFoundError(err)
237 f8c4eafe Carles
238 f8c4eafe Carles
    return np.array(energies)