Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ f8c4eafe

Historique | Voir | Annoter | Télécharger (5,97 ko)

1 b6f47f2d Carles
"""Module for the conversion between atomic coordinates files and objects
2 e23f119b Carles

3 e23f119b Carles
functions:
4 f3004731 Carles
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5 f3004731 Carles
    of separate mol objects.
6 f3004731 Carles
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
7 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
8 b6f47f2d Carles
read_coords: Reads the atomic coordinates resulting from finished calculations.
9 e23f119b Carles
"""
10 e23f119b Carles
11 e23f119b Carles
import logging
12 f3004731 Carles
13 f3004731 Carles
import rdkit.Chem.AllChem as Chem
14 f3004731 Carles
15 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
16 e23f119b Carles
17 e23f119b Carles
18 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
19 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
20 f3004731 Carles
    separate mol objects.
21 f3004731 Carles

22 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
23 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
24 9510666f Carles
        all conformers are considered.
25 f3004731 Carles
    @return: list of separate mol objects.
26 f3004731 Carles
    """
27 f3004731 Carles
    if idx_lst is None:
28 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
29 f3004731 Carles
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
30 f3004731 Carles
                                 removeHs=False) for idx in idx_lst]
31 f3004731 Carles
32 f3004731 Carles
33 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
34 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
35 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
36 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
37 f3004731 Carles
    """
38 f3004731 Carles
    from ase import Atoms
39 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
40 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
41 9510666f Carles
                       'converting to Atoms only the first conformer')
42 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
43 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
44 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
45 f3004731 Carles
46 f3004731 Carles
47 83f022c9 Carles
def adapt_format(requirement, coord_file):
48 e23f119b Carles
    """Converts the coordinate files into a required library object type.
49 e23f119b Carles

50 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
51 e23f119b Carles
    coordinate file into a library-workable object.
52 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
53 e23f119b Carles
    made. Accepted values: 'ase', 'rdkit'.
54 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
55 e23f119b Carles
    Accepted file tyoes: 'xyz', 'mol'.
56 e23f119b Carles
    @return: an object the required library can work with.
57 e23f119b Carles
    """
58 439ce5f7 Carles
    import ase.io
59 8ab593ee Carles
    from ase.io.formats import filetype
60 8ab593ee Carles
61 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
62 8ab593ee Carles
    file_type_vals = ['xyz', 'mol']
63 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
64 4381145e Carles
              f" has not yet been implemented"
65 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
66 4381145e Carles
67 f3004731 Carles
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
68 4381145e Carles
69 4381145e Carles
    if requirement not in req_vals:
70 9f7bb440 Carles
        logger.error(lib_err)
71 4381145e Carles
        raise NotImplementedError(lib_err)
72 4381145e Carles
73 4381145e Carles
    if filetype(coord_file) not in file_type_vals:
74 9f7bb440 Carles
        logger.error(fil_type_err)
75 4381145e Carles
        raise NotImplementedError(fil_type_err)
76 8ab593ee Carles
77 8ab593ee Carles
    if requirement == 'rdkit':
78 8ab593ee Carles
        if filetype(coord_file) == 'xyz':
79 439ce5f7 Carles
            from xyz2mol import xyz2mol
80 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
81 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
82 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
83 b6f47f2d Carles
            # TODO Add routine to read charge
84 b6f47f2d Carles
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
85 8d08beb4 Carles
            logger.debug(conv_info)
86 8ab593ee Carles
            return rd_mol_obj
87 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
88 8ab593ee Carles
            from rdkit.Chem import MolFromMolFile
89 8d08beb4 Carles
            logger.debug(conv_info)
90 b9551fc2 Carles
            return MolFromMolFile(coord_file, removeHs=False)
91 8ab593ee Carles
92 8ab593ee Carles
    if requirement == 'ase':
93 8d08beb4 Carles
        logger.debug(conv_info)
94 8ab593ee Carles
        return ase.io.read(coord_file)
95 b6f47f2d Carles
96 b6f47f2d Carles
97 b6f47f2d Carles
def read_coords(code, run_type, req):
98 b6f47f2d Carles
    """Reads the atomic coordinates resulting from finished calculations.
99 b6f47f2d Carles

100 b6f47f2d Carles
    Given a run_type ('isolated', 'screening' or 'refinement') directory
101 b6f47f2d Carles
    containing different subdirectories with finished calculations in every
102 f8c4eafe Carles
    subdirectory, it reads, from each subirectory, the final coordinates
103 f8c4eafe Carles
    resulting from the calculation and returns a list of objects adequate to the
104 f8c4eafe Carles
    required library.
105 b6f47f2d Carles

106 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
107 b6f47f2d Carles
    @param run_type: the type of calculation (and also the name of the folder)
108 b6f47f2d Carles
                     containing the calculation subdirectories.
109 f8c4eafe Carles
    @param req: The required library object type to make the list of (eg. rdkit,
110 f8c4eafe Carles
                ase)
111 b6f47f2d Carles
    @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
112 b6f47f2d Carles
    """
113 b6f47f2d Carles
    import os
114 b6f47f2d Carles
    if code == 'cp2k':
115 b6f47f2d Carles
        pattern = '-pos-1.xyz'
116 b6f47f2d Carles
    else:
117 b6f47f2d Carles
        pattern = ''
118 b6f47f2d Carles
    return [adapt_format(req, f'{run_type}/{conf}/{fil}')
119 b6f47f2d Carles
            for conf in os.listdir(run_type)
120 b6f47f2d Carles
            for fil in os.listdir(f"{run_type}/{conf}") if pattern in fil]
121 f8c4eafe Carles
122 f8c4eafe Carles
123 f8c4eafe Carles
def read_energies(code, run_type):
124 f8c4eafe Carles
    """Reads the energies resulting from finished calculations.
125 f8c4eafe Carles

126 f8c4eafe Carles
    Given a run_type ('isolated', 'screening' or 'refinement') directory
127 f8c4eafe Carles
    containing different subdirectories with finished calculations in every
128 f8c4eafe Carles
    subdirectory, it reads the final energies of calculations inside each
129 f8c4eafe Carles
    subdirectory.
130 f8c4eafe Carles

131 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
132 f8c4eafe Carles
    @param run_type: the type of calculation (and also the name of the folder)
133 f8c4eafe Carles
                     containing the calculation subdirectories.
134 f8c4eafe Carles
    @return: list of energies
135 f8c4eafe Carles
    """
136 f8c4eafe Carles
    import os
137 f8c4eafe Carles
    import numpy as np
138 f8c4eafe Carles
    from utilities import tail
139 f8c4eafe Carles
140 f8c4eafe Carles
    energies = []
141 f8c4eafe Carles
    if code == 'cp2k':
142 f8c4eafe Carles
        pattern = '-pos-1.xyz'
143 f8c4eafe Carles
        for conf in os.listdir(run_type):
144 f8c4eafe Carles
            for fil in os.listdir(f"{run_type}/{conf}"):
145 f8c4eafe Carles
                if pattern in fil:
146 f8c4eafe Carles
                    traj_fh = open(f"{run_type}/{conf}/{fil}", 'rb')
147 f8c4eafe Carles
                    num_atoms = int(traj_fh.readline().strip())
148 f8c4eafe Carles
                    last_geo = tail(traj_fh, num_atoms + 2).splitlines()
149 f8c4eafe Carles
                    for line in last_geo:
150 f8c4eafe Carles
                        if 'E =' in line:
151 f8c4eafe Carles
                            energies.append(float(line.split('E =')[1]))
152 f8c4eafe Carles
153 f8c4eafe Carles
    return np.array(energies)