dockonsurf / modules / formats.py @ 0db30d07
Historique | Voir | Annoter | Télécharger (3,53 ko)
1 |
"""Module for the conversion of coordinate files to library-workable objects
|
---|---|
2 |
|
3 |
functions:
|
4 |
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
|
5 |
of separate mol objects.
|
6 |
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
|
7 |
adapt_format: Converts the coordinate files into a required library object type
|
8 |
"""
|
9 |
|
10 |
import logging |
11 |
|
12 |
import rdkit.Chem.AllChem as Chem |
13 |
|
14 |
logger = logging.getLogger('DockOnSurf')
|
15 |
|
16 |
|
17 |
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None): |
18 |
"""Converts the conformers inside a rdkit mol object to a list of
|
19 |
separate mol objects.
|
20 |
|
21 |
@param mol: rdkit mol object containing at least one conformer.
|
22 |
@param idx_lst: list of conformer indices to be considered. If not passed,
|
23 |
all conformers are considered.
|
24 |
@return: list of separate mol objects.
|
25 |
"""
|
26 |
if idx_lst is None: |
27 |
idx_lst = list(range(mol.GetNumConformers())) |
28 |
return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)), |
29 |
removeHs=False) for idx in idx_lst] |
30 |
|
31 |
|
32 |
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol): |
33 |
"""Converts a rdkit mol object into ase Atoms object.
|
34 |
@param mol: rdkit mol object containing only one conformer.
|
35 |
@return ase.Atoms: ase Atoms object with the same coordinates.
|
36 |
"""
|
37 |
from ase import Atoms |
38 |
if mol.GetNumConformers() > 1: |
39 |
logger.warning('A mol object with multiple conformers is parsed, '
|
40 |
'converting to Atoms only the first conformer')
|
41 |
symbols = [atm.GetSymbol() for atm in mol.GetAtoms()] |
42 |
positions = mol.GetConformer(0).GetPositions()
|
43 |
return Atoms(symbols=symbols, positions=positions)
|
44 |
|
45 |
|
46 |
def adapt_format(requirement, coord_file): |
47 |
"""Converts the coordinate files into a required library object type.
|
48 |
|
49 |
Depending on the library required to use and the file type, it converts the
|
50 |
coordinate file into a library-workable object.
|
51 |
@param requirement: str, the library for which the conversion should be
|
52 |
made. Accepted values: 'ase', 'rdkit'.
|
53 |
@param coord_file: str, path to the coordinates file aiming to convert.
|
54 |
Accepted file tyoes: 'xyz', 'mol'.
|
55 |
@return: an object the required library can work with.
|
56 |
"""
|
57 |
import ase.io |
58 |
from ase.io.formats import filetype |
59 |
|
60 |
req_vals = ['rdkit', 'ase'] |
61 |
file_type_vals = ['xyz', 'mol'] |
62 |
lib_err = f"The conversion to the '{requirement}' library object type" \
|
63 |
f" has not yet been implemented"
|
64 |
conv_info = f"Converted {coord_file} to {requirement} object type"
|
65 |
|
66 |
fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
|
67 |
|
68 |
if requirement not in req_vals: |
69 |
logger.error(lib_err) |
70 |
raise NotImplementedError(lib_err) |
71 |
|
72 |
if filetype(coord_file) not in file_type_vals: |
73 |
logger.error(fil_type_err) |
74 |
raise NotImplementedError(fil_type_err) |
75 |
|
76 |
if requirement == 'rdkit': |
77 |
if filetype(coord_file) == 'xyz': |
78 |
from xyz2mol import xyz2mol |
79 |
ase_atms = ase.io.read(coord_file) |
80 |
atomic_nums = ase_atms.get_atomic_numbers().tolist() |
81 |
xyz_coordinates = ase_atms.positions.tolist() |
82 |
rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0) # TODO Charge |
83 |
logger.debug(conv_info) |
84 |
return rd_mol_obj
|
85 |
elif filetype(coord_file) == 'mol': |
86 |
from rdkit.Chem import MolFromMolFile |
87 |
logger.debug(conv_info) |
88 |
return MolFromMolFile(coord_file, removeHs=False) |
89 |
|
90 |
if requirement == 'ase': |
91 |
logger.debug(conv_info) |
92 |
return ase.io.read(coord_file)
|