Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ c6e71e46

Historique | Voir | Annoter | Télécharger (8,4 ko)

1 b6f47f2d Carles
"""Module for the conversion between atomic coordinates files and objects
2 e23f119b Carles

3 e23f119b Carles
functions:
4 f3004731 Carles
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5 f3004731 Carles
    of separate mol objects.
6 f3004731 Carles
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
7 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
8 b6f47f2d Carles
read_coords: Reads the atomic coordinates resulting from finished calculations.
9 e23f119b Carles
"""
10 e23f119b Carles
11 e23f119b Carles
import logging
12 f3004731 Carles
13 f3004731 Carles
import rdkit.Chem.AllChem as Chem
14 f3004731 Carles
15 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
16 e23f119b Carles
17 e23f119b Carles
18 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
19 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
20 f3004731 Carles
    separate mol objects.
21 f3004731 Carles

22 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
23 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
24 9510666f Carles
        all conformers are considered.
25 f3004731 Carles
    @return: list of separate mol objects.
26 f3004731 Carles
    """
27 f3004731 Carles
    if idx_lst is None:
28 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
29 f3004731 Carles
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
30 f3004731 Carles
                                 removeHs=False) for idx in idx_lst]
31 f3004731 Carles
32 f3004731 Carles
33 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
34 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
35 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
36 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
37 f3004731 Carles
    """
38 f3004731 Carles
    from ase import Atoms
39 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
40 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
41 695dcff8 Carles Marti
                       'converting to Atoms only the first conformer.')
42 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
43 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
44 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
45 f3004731 Carles
46 f3004731 Carles
47 cc92f6ee Carles Marti
def add_special_atoms(symbol_pairs):
48 cc92f6ee Carles Marti
    """Allows to use custom elements with symbols not in the periodic table.
49 cc92f6ee Carles Marti

50 cc92f6ee Carles Marti
    This function adds new chemical elements to be used by ase. Every new custom
51 cc92f6ee Carles Marti
    element must have a traditional (present in the periodic table) partner
52 cc92f6ee Carles Marti
    from which to obtain all its properties.
53 cc92f6ee Carles Marti
    @param symbol_pairs: List of tuples containing the pairs of chemical symbols.
54 cc92f6ee Carles Marti
        Every tuple contains a pair of chemical symbols, the first label must be
55 cc92f6ee Carles Marti
        the label of the custom element and the second one the symbol of the
56 cc92f6ee Carles Marti
        reference one (traditional present on the periodic table).
57 cc92f6ee Carles Marti
    @return:
58 cc92f6ee Carles Marti
    """
59 cc92f6ee Carles Marti
    import numpy as np
60 cc92f6ee Carles Marti
    from ase import data
61 cc92f6ee Carles Marti
    for i, pair in enumerate(symbol_pairs):
62 cc92f6ee Carles Marti
        data.chemical_symbols += [pair[0]]
63 cc92f6ee Carles Marti
        z_orig = data.atomic_numbers[pair[1]]
64 cc92f6ee Carles Marti
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
65 cc92f6ee Carles Marti
        orig_com_mass = data.atomic_masses_common[z_orig]
66 cc92f6ee Carles Marti
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
67 cc92f6ee Carles Marti
        data.atomic_names += [pair[0]]
68 cc92f6ee Carles Marti
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
69 cc92f6ee Carles Marti
                                                 orig_iupac_mass)
70 cc92f6ee Carles Marti
        data.atomic_masses = data.atomic_masses_iupac2016
71 cc92f6ee Carles Marti
        data.atomic_masses_common = np.append(data.atomic_masses_common,
72 cc92f6ee Carles Marti
                                              orig_com_mass)
73 cc92f6ee Carles Marti
        data.covalent_radii = np.append(data.covalent_radii,
74 cc92f6ee Carles Marti
                                        data.covalent_radii[z_orig])
75 cc92f6ee Carles Marti
        data.reference_states += [data.reference_states[z_orig]]
76 90819cc3 Carles Marti
        # TODO Add vdw_radii, gsmm and aml (smaller length)
77 cc92f6ee Carles Marti
78 cc92f6ee Carles Marti
79 90819cc3 Carles Marti
def adapt_format(requirement, coord_file, spec_atms=tuple()):
80 e23f119b Carles
    """Converts the coordinate files into a required library object type.
81 e23f119b Carles

82 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
83 e23f119b Carles
    coordinate file into a library-workable object.
84 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
85 e23f119b Carles
    made. Accepted values: 'ase', 'rdkit'.
86 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
87 e23f119b Carles
    Accepted file tyoes: 'xyz', 'mol'.
88 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
89 90819cc3 Carles Marti
        chemical symbols.
90 e23f119b Carles
    @return: an object the required library can work with.
91 e23f119b Carles
    """
92 439ce5f7 Carles
    import ase.io
93 8ab593ee Carles
    from ase.io.formats import filetype
94 8ab593ee Carles
95 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
96 8ab593ee Carles
    file_type_vals = ['xyz', 'mol']
97 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
98 4381145e Carles
              f" has not yet been implemented"
99 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
100 4381145e Carles
101 f3004731 Carles
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
102 4381145e Carles
103 4381145e Carles
    if requirement not in req_vals:
104 9f7bb440 Carles
        logger.error(lib_err)
105 4381145e Carles
        raise NotImplementedError(lib_err)
106 4381145e Carles
107 4381145e Carles
    if filetype(coord_file) not in file_type_vals:
108 9f7bb440 Carles
        logger.error(fil_type_err)
109 4381145e Carles
        raise NotImplementedError(fil_type_err)
110 8ab593ee Carles
111 8ab593ee Carles
    if requirement == 'rdkit':
112 8ab593ee Carles
        if filetype(coord_file) == 'xyz':
113 af3e2441 Carles Marti
            from modules.xyz2mol import xyz2mol
114 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
115 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
116 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
117 b6f47f2d Carles
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
118 8d08beb4 Carles
            logger.debug(conv_info)
119 21e2cca5 Carles Marti
            return Chem.AddHs(rd_mol_obj)
120 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
121 8d08beb4 Carles
            logger.debug(conv_info)
122 21e2cca5 Carles Marti
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
123 8ab593ee Carles
124 8ab593ee Carles
    if requirement == 'ase':
125 cc92f6ee Carles Marti
        add_special_atoms(spec_atms)
126 21e2cca5 Carles Marti
        if filetype(coord_file) == 'xyz':
127 21e2cca5 Carles Marti
            logger.debug(conv_info)
128 21e2cca5 Carles Marti
            return ase.io.read(coord_file)
129 21e2cca5 Carles Marti
        elif filetype(coord_file) == 'mol':
130 21e2cca5 Carles Marti
            logger.debug(conv_info)
131 21e2cca5 Carles Marti
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
132 21e2cca5 Carles Marti
            return rdkit_mol_to_ase_atoms(rd_mol)
133 b6f47f2d Carles
134 b6f47f2d Carles
135 90819cc3 Carles Marti
def read_coords(code, run_type, req, spec_atms=tuple()):
136 b6f47f2d Carles
    """Reads the atomic coordinates resulting from finished calculations.
137 b6f47f2d Carles

138 b6f47f2d Carles
    Given a run_type ('isolated', 'screening' or 'refinement') directory
139 b6f47f2d Carles
    containing different subdirectories with finished calculations in every
140 f8c4eafe Carles
    subdirectory, it reads, from each subirectory, the final coordinates
141 f8c4eafe Carles
    resulting from the calculation and returns a list of objects adequate to the
142 f8c4eafe Carles
    required library.
143 b6f47f2d Carles

144 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
145 b6f47f2d Carles
    @param run_type: the type of calculation (and also the name of the folder)
146 b6f47f2d Carles
                     containing the calculation subdirectories.
147 f8c4eafe Carles
    @param req: The required library object type to make the list of (eg. rdkit,
148 f8c4eafe Carles
                ase)
149 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
150 90819cc3 Carles Marti
        chemical symbols.
151 b6f47f2d Carles
    @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
152 b6f47f2d Carles
    """
153 b6f47f2d Carles
    import os
154 c6e71e46 Carles Marti
    # Relate file-name patterns to codes
155 b6f47f2d Carles
    if code == 'cp2k':
156 b6f47f2d Carles
        pattern = '-pos-1.xyz'
157 b6f47f2d Carles
    else:
158 b6f47f2d Carles
        pattern = ''
159 c6e71e46 Carles Marti
160 c6e71e46 Carles Marti
    # Read appropriate files and transform them to adequate object
161 c6e71e46 Carles Marti
    atoms_list = []
162 c6e71e46 Carles Marti
    for conf in os.listdir(run_type):
163 c6e71e46 Carles Marti
        if not os.path.isdir(f'{run_type}/{conf}') or 'conf_' not in conf:
164 c6e71e46 Carles Marti
            continue
165 c6e71e46 Carles Marti
        for fil in os.listdir(f"{run_type}/{conf}"):
166 c6e71e46 Carles Marti
            if pattern not in fil:
167 c6e71e46 Carles Marti
                continue
168 c6e71e46 Carles Marti
            atoms_list.append(adapt_format(req, f'{run_type}/{conf}/{fil}',
169 c6e71e46 Carles Marti
                                           spec_atms))
170 c6e71e46 Carles Marti
    return atoms_list
171 f8c4eafe Carles
172 f8c4eafe Carles
173 f8c4eafe Carles
def read_energies(code, run_type):
174 f8c4eafe Carles
    """Reads the energies resulting from finished calculations.
175 f8c4eafe Carles

176 f8c4eafe Carles
    Given a run_type ('isolated', 'screening' or 'refinement') directory
177 f8c4eafe Carles
    containing different subdirectories with finished calculations in every
178 f8c4eafe Carles
    subdirectory, it reads the final energies of calculations inside each
179 f8c4eafe Carles
    subdirectory.
180 f8c4eafe Carles

181 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
182 f8c4eafe Carles
    @param run_type: the type of calculation (and also the name of the folder)
183 f8c4eafe Carles
                     containing the calculation subdirectories.
184 f8c4eafe Carles
    @return: list of energies
185 f8c4eafe Carles
    """
186 f8c4eafe Carles
    import os
187 f8c4eafe Carles
    import numpy as np
188 af3e2441 Carles Marti
    from modules.utilities import tail
189 f8c4eafe Carles
190 f8c4eafe Carles
    energies = []
191 f8c4eafe Carles
    if code == 'cp2k':
192 f8c4eafe Carles
        pattern = '-pos-1.xyz'
193 f8c4eafe Carles
        for conf in os.listdir(run_type):
194 f8c4eafe Carles
            for fil in os.listdir(f"{run_type}/{conf}"):
195 f8c4eafe Carles
                if pattern in fil:
196 f8c4eafe Carles
                    traj_fh = open(f"{run_type}/{conf}/{fil}", 'rb')
197 f8c4eafe Carles
                    num_atoms = int(traj_fh.readline().strip())
198 f8c4eafe Carles
                    last_geo = tail(traj_fh, num_atoms + 2).splitlines()
199 f8c4eafe Carles
                    for line in last_geo:
200 f8c4eafe Carles
                        if 'E =' in line:
201 f8c4eafe Carles
                            energies.append(float(line.split('E =')[1]))
202 f8c4eafe Carles
203 f8c4eafe Carles
    return np.array(energies)