Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / xyz2mol.py @ 1297d18b

Historique | Voir | Annoter | Télécharger (21,02 ko)

1 8ab593ee Carles
"""
2 8ab593ee Carles
Module for generating rdkit molobj/smiles/molecular graph from free atoms
3 8ab593ee Carles

4 8ab593ee Carles
Implementation by Jan H. Jensen, based on the paper
5 8ab593ee Carles

6 8ab593ee Carles
    Yeonjoon Kim and Woo Youn Kim
7 8ab593ee Carles
    "Universal Structure Conversion Method for Organic Molecules: From Atomic Connectivity
8 8ab593ee Carles
    to Three-Dimensional Geometry"
9 8ab593ee Carles
    Bull. Korean Chem. Soc. 2015, Vol. 36, 1769-1777
10 8ab593ee Carles
    DOI: 10.1002/bkcs.10334
11 8ab593ee Carles

12 8ab593ee Carles
"""
13 8ab593ee Carles
14 8ab593ee Carles
import copy
15 8ab593ee Carles
import itertools
16 8ab593ee Carles
17 8ab593ee Carles
from rdkit.Chem import rdmolops
18 8ab593ee Carles
try:
19 8ab593ee Carles
    from rdkit.Chem import rdEHTTools #requires RDKit 2019.9.1 or later
20 8ab593ee Carles
except ImportError:
21 8ab593ee Carles
    rdEHTTools = None
22 8ab593ee Carles
23 8ab593ee Carles
from collections import defaultdict
24 8ab593ee Carles
25 8ab593ee Carles
import numpy as np
26 8ab593ee Carles
import networkx as nx
27 8ab593ee Carles
28 8ab593ee Carles
from rdkit import Chem
29 8ab593ee Carles
from rdkit.Chem import AllChem, rdmolops
30 8ab593ee Carles
31 8ab593ee Carles
global __ATOM_LIST__
32 8ab593ee Carles
__ATOM_LIST__ = \
33 8ab593ee Carles
    ['h',  'he',
34 8ab593ee Carles
     'li', 'be', 'b',  'c',  'n',  'o',  'f',  'ne',
35 8ab593ee Carles
     'na', 'mg', 'al', 'si', 'p',  's',  'cl', 'ar',
36 8ab593ee Carles
     'k',  'ca', 'sc', 'ti', 'v ', 'cr', 'mn', 'fe', 'co', 'ni', 'cu',
37 8ab593ee Carles
     'zn', 'ga', 'ge', 'as', 'se', 'br', 'kr',
38 8ab593ee Carles
     'rb', 'sr', 'y',  'zr', 'nb', 'mo', 'tc', 'ru', 'rh', 'pd', 'ag',
39 8ab593ee Carles
     'cd', 'in', 'sn', 'sb', 'te', 'i',  'xe',
40 8ab593ee Carles
     'cs', 'ba', 'la', 'ce', 'pr', 'nd', 'pm', 'sm', 'eu', 'gd', 'tb', 'dy',
41 8ab593ee Carles
     'ho', 'er', 'tm', 'yb', 'lu', 'hf', 'ta', 'w',  're', 'os', 'ir', 'pt',
42 8ab593ee Carles
     'au', 'hg', 'tl', 'pb', 'bi', 'po', 'at', 'rn',
43 8ab593ee Carles
     'fr', 'ra', 'ac', 'th', 'pa', 'u',  'np', 'pu']
44 8ab593ee Carles
45 8ab593ee Carles
46 8ab593ee Carles
global atomic_valence
47 8ab593ee Carles
global atomic_valence_electrons
48 8ab593ee Carles
49 8ab593ee Carles
atomic_valence = defaultdict(list)
50 8ab593ee Carles
atomic_valence[1] = [1]
51 8ab593ee Carles
atomic_valence[6] = [4]
52 8ab593ee Carles
atomic_valence[7] = [3,4]
53 8ab593ee Carles
atomic_valence[8] = [2,1]
54 8ab593ee Carles
atomic_valence[9] = [1]
55 8ab593ee Carles
atomic_valence[14] = [4]
56 8ab593ee Carles
atomic_valence[15] = [5,3] #[5,4,3]
57 8ab593ee Carles
atomic_valence[16] = [6,3,2] #[6,4,2]
58 8ab593ee Carles
atomic_valence[17] = [1]
59 8ab593ee Carles
atomic_valence[32] = [4]
60 8ab593ee Carles
atomic_valence[35] = [1]
61 8ab593ee Carles
atomic_valence[53] = [1]
62 8ab593ee Carles
63 8ab593ee Carles
atomic_valence_electrons = {}
64 8ab593ee Carles
atomic_valence_electrons[1] = 1
65 8ab593ee Carles
atomic_valence_electrons[6] = 4
66 8ab593ee Carles
atomic_valence_electrons[7] = 5
67 8ab593ee Carles
atomic_valence_electrons[8] = 6
68 8ab593ee Carles
atomic_valence_electrons[9] = 7
69 8ab593ee Carles
atomic_valence_electrons[14] = 4
70 8ab593ee Carles
atomic_valence_electrons[15] = 5
71 8ab593ee Carles
atomic_valence_electrons[16] = 6
72 8ab593ee Carles
atomic_valence_electrons[17] = 7
73 8ab593ee Carles
atomic_valence_electrons[32] = 4
74 8ab593ee Carles
atomic_valence_electrons[35] = 7
75 8ab593ee Carles
atomic_valence_electrons[53] = 7
76 8ab593ee Carles
77 8ab593ee Carles
78 8ab593ee Carles
def str_atom(atom):
79 8ab593ee Carles
    """
80 8ab593ee Carles
    convert integer atom to string atom
81 8ab593ee Carles
    """
82 8ab593ee Carles
    global __ATOM_LIST__
83 8ab593ee Carles
    atom = __ATOM_LIST__[atom - 1]
84 8ab593ee Carles
    return atom
85 8ab593ee Carles
86 8ab593ee Carles
87 8ab593ee Carles
def int_atom(atom):
88 8ab593ee Carles
    """
89 8ab593ee Carles
    convert str atom to integer atom
90 8ab593ee Carles
    """
91 8ab593ee Carles
    global __ATOM_LIST__
92 b5163003 Carles
    # print(atom)
93 8ab593ee Carles
    atom = atom.lower()
94 8ab593ee Carles
    return __ATOM_LIST__.index(atom) + 1
95 8ab593ee Carles
96 8ab593ee Carles
97 8ab593ee Carles
def get_UA(maxValence_list, valence_list):
98 8ab593ee Carles
    """
99 8ab593ee Carles
    """
100 8ab593ee Carles
    UA = []
101 8ab593ee Carles
    DU = []
102 8ab593ee Carles
    for i, (maxValence, valence) in enumerate(zip(maxValence_list, valence_list)):
103 8ab593ee Carles
        if not maxValence - valence > 0:
104 8ab593ee Carles
            continue
105 8ab593ee Carles
        UA.append(i)
106 8ab593ee Carles
        DU.append(maxValence - valence)
107 8ab593ee Carles
    return UA, DU
108 8ab593ee Carles
109 8ab593ee Carles
110 8ab593ee Carles
def get_BO(AC, UA, DU, valences, UA_pairs, use_graph=True):
111 8ab593ee Carles
    """
112 8ab593ee Carles
    """
113 8ab593ee Carles
    BO = AC.copy()
114 8ab593ee Carles
    DU_save = []
115 8ab593ee Carles
116 8ab593ee Carles
    while DU_save != DU:
117 8ab593ee Carles
        for i, j in UA_pairs:
118 8ab593ee Carles
            BO[i, j] += 1
119 8ab593ee Carles
            BO[j, i] += 1
120 8ab593ee Carles
121 8ab593ee Carles
        BO_valence = list(BO.sum(axis=1))
122 8ab593ee Carles
        DU_save = copy.copy(DU)
123 8ab593ee Carles
        UA, DU = get_UA(valences, BO_valence)
124 8ab593ee Carles
        UA_pairs = get_UA_pairs(UA, AC, use_graph=use_graph)[0]
125 8ab593ee Carles
126 8ab593ee Carles
    return BO
127 8ab593ee Carles
128 8ab593ee Carles
129 8ab593ee Carles
def valences_not_too_large(BO, valences):
130 8ab593ee Carles
    """
131 8ab593ee Carles
    """
132 8ab593ee Carles
    number_of_bonds_list = BO.sum(axis=1)
133 8ab593ee Carles
    for valence, number_of_bonds in zip(valences, number_of_bonds_list):
134 8ab593ee Carles
        if number_of_bonds > valence:
135 8ab593ee Carles
            return False
136 8ab593ee Carles
137 8ab593ee Carles
    return True
138 8ab593ee Carles
139 8ab593ee Carles
140 8ab593ee Carles
def BO_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valances,
141 8ab593ee Carles
    allow_charged_fragments=True):
142 8ab593ee Carles
    """
143 8ab593ee Carles
    Sanity of bond-orders
144 8ab593ee Carles

145 8ab593ee Carles
    args:
146 8ab593ee Carles
        BO -
147 8ab593ee Carles
        AC -
148 8ab593ee Carles
        charge -
149 8ab593ee Carles
        DU - 
150 8ab593ee Carles

151 8ab593ee Carles

152 8ab593ee Carles
    optional
153 8ab593ee Carles
        allow_charges_fragments - 
154 8ab593ee Carles

155 8ab593ee Carles

156 8ab593ee Carles
    returns:
157 8ab593ee Carles
        boolean - true of molecule is OK, false if not
158 8ab593ee Carles
    """
159 8ab593ee Carles
160 8ab593ee Carles
    if not valences_not_too_large(BO, valances):
161 8ab593ee Carles
        return False
162 8ab593ee Carles
163 8ab593ee Carles
    # total charge
164 8ab593ee Carles
    Q = 0
165 8ab593ee Carles
166 8ab593ee Carles
    # charge fragment list
167 8ab593ee Carles
    q_list = []
168 8ab593ee Carles
169 8ab593ee Carles
    if allow_charged_fragments:
170 8ab593ee Carles
171 8ab593ee Carles
        BO_valences = list(BO.sum(axis=1))
172 8ab593ee Carles
        for i, atom in enumerate(atoms):
173 8ab593ee Carles
            q = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i])
174 8ab593ee Carles
            Q += q
175 8ab593ee Carles
            if atom == 6:
176 8ab593ee Carles
                number_of_single_bonds_to_C = list(BO[i, :]).count(1)
177 8ab593ee Carles
                if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
178 8ab593ee Carles
                    Q += 1
179 8ab593ee Carles
                    q = 2
180 8ab593ee Carles
                if number_of_single_bonds_to_C == 3 and Q + 1 < charge:
181 8ab593ee Carles
                    Q += 2
182 8ab593ee Carles
                    q = 1
183 8ab593ee Carles
184 8ab593ee Carles
            if q != 0:
185 8ab593ee Carles
                q_list.append(q)
186 8ab593ee Carles
187 8ab593ee Carles
    check_sum = (BO - AC).sum() == sum(DU)
188 8ab593ee Carles
    check_charge = charge == Q
189 8ab593ee Carles
    # check_len = len(q_list) <= abs(charge)
190 8ab593ee Carles
191 8ab593ee Carles
    if check_sum and check_charge:
192 8ab593ee Carles
        return True
193 8ab593ee Carles
194 8ab593ee Carles
    return False
195 8ab593ee Carles
196 8ab593ee Carles
197 8ab593ee Carles
def get_atomic_charge(atom, atomic_valence_electrons, BO_valence):
198 8ab593ee Carles
    """
199 8ab593ee Carles
    """
200 8ab593ee Carles
201 8ab593ee Carles
    if atom == 1:
202 8ab593ee Carles
        charge = 1 - BO_valence
203 8ab593ee Carles
    elif atom == 5:
204 8ab593ee Carles
        charge = 3 - BO_valence
205 8ab593ee Carles
    elif atom == 15 and BO_valence == 5:
206 8ab593ee Carles
        charge = 0
207 8ab593ee Carles
    elif atom == 16 and BO_valence == 6:
208 8ab593ee Carles
        charge = 0
209 8ab593ee Carles
    else:
210 8ab593ee Carles
        charge = atomic_valence_electrons - 8 + BO_valence
211 8ab593ee Carles
212 8ab593ee Carles
    return charge
213 8ab593ee Carles
214 8ab593ee Carles
215 8ab593ee Carles
def clean_charges(mol):
216 8ab593ee Carles
    """
217 8ab593ee Carles
    This hack should not be needed anymore, but is kept just in case
218 8ab593ee Carles

219 8ab593ee Carles
    """
220 8ab593ee Carles
221 8ab593ee Carles
    Chem.SanitizeMol(mol)
222 8ab593ee Carles
    #rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',
223 8ab593ee Carles
    #              '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]',
224 8ab593ee Carles
    #              '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]',
225 8ab593ee Carles
    #              '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]',
226 8ab593ee Carles
    #              '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]',
227 8ab593ee Carles
    #              '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]']
228 8ab593ee Carles
229 8ab593ee Carles
    rxn_smarts = ['[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][CX3-,NX3-:5][#6,#7:6]1=[#6,#7:7]>>'
230 8ab593ee Carles
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][-0,-0:5]=[#6,#7:6]1[#6-,#7-:7]',
231 8ab593ee Carles
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3](=[#6,#7:4])[#6,#7:5]=[#6,#7:6][CX3-,NX3-:7]1>>'
232 8ab593ee Carles
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3]([#6-,#7-:4])=[#6,#7:5][#6,#7:6]=[-0,-0:7]1']
233 8ab593ee Carles
234 8ab593ee Carles
    fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False)
235 8ab593ee Carles
236 8ab593ee Carles
    for i, fragment in enumerate(fragments):
237 8ab593ee Carles
        for smarts in rxn_smarts:
238 8ab593ee Carles
            patt = Chem.MolFromSmarts(smarts.split(">>")[0])
239 8ab593ee Carles
            while fragment.HasSubstructMatch(patt):
240 8ab593ee Carles
                rxn = AllChem.ReactionFromSmarts(smarts)
241 8ab593ee Carles
                ps = rxn.RunReactants((fragment,))
242 8ab593ee Carles
                fragment = ps[0][0]
243 8ab593ee Carles
                Chem.SanitizeMol(fragment)
244 8ab593ee Carles
        if i == 0:
245 8ab593ee Carles
            mol = fragment
246 8ab593ee Carles
        else:
247 8ab593ee Carles
            mol = Chem.CombineMols(mol, fragment)
248 8ab593ee Carles
249 8ab593ee Carles
    return mol
250 8ab593ee Carles
251 8ab593ee Carles
252 8ab593ee Carles
def BO2mol(mol, BO_matrix, atoms, atomic_valence_electrons,
253 8ab593ee Carles
           mol_charge, allow_charged_fragments=True):
254 8ab593ee Carles
    """
255 8ab593ee Carles
    based on code written by Paolo Toscani
256 8ab593ee Carles

257 8ab593ee Carles
    From bond order, atoms, valence structure and total charge, generate an
258 8ab593ee Carles
    rdkit molecule.
259 8ab593ee Carles

260 8ab593ee Carles
    args:
261 8ab593ee Carles
        mol - rdkit molecule
262 8ab593ee Carles
        BO_matrix - bond order matrix of molecule
263 8ab593ee Carles
        atoms - list of integer atomic symbols
264 8ab593ee Carles
        atomic_valence_electrons -
265 8ab593ee Carles
        mol_charge - total charge of molecule
266 8ab593ee Carles

267 8ab593ee Carles
    optional:
268 8ab593ee Carles
        allow_charged_fragments - bool - allow charged fragments
269 8ab593ee Carles

270 8ab593ee Carles
    returns
271 8ab593ee Carles
        mol - updated rdkit molecule with bond connectivity
272 8ab593ee Carles

273 8ab593ee Carles
    """
274 8ab593ee Carles
275 8ab593ee Carles
    l = len(BO_matrix)
276 8ab593ee Carles
    l2 = len(atoms)
277 8ab593ee Carles
    BO_valences = list(BO_matrix.sum(axis=1))
278 8ab593ee Carles
279 8ab593ee Carles
    if (l != l2):
280 8ab593ee Carles
        raise RuntimeError('sizes of adjMat ({0:d}) and Atoms {1:d} differ'.format(l, l2))
281 8ab593ee Carles
282 8ab593ee Carles
    rwMol = Chem.RWMol(mol)
283 8ab593ee Carles
284 8ab593ee Carles
    bondTypeDict = {
285 8ab593ee Carles
        1: Chem.BondType.SINGLE,
286 8ab593ee Carles
        2: Chem.BondType.DOUBLE,
287 8ab593ee Carles
        3: Chem.BondType.TRIPLE
288 8ab593ee Carles
    }
289 8ab593ee Carles
290 8ab593ee Carles
    for i in range(l):
291 8ab593ee Carles
        for j in range(i + 1, l):
292 8ab593ee Carles
            bo = int(round(BO_matrix[i, j]))
293 8ab593ee Carles
            if (bo == 0):
294 8ab593ee Carles
                continue
295 8ab593ee Carles
            bt = bondTypeDict.get(bo, Chem.BondType.SINGLE)
296 8ab593ee Carles
            rwMol.AddBond(i, j, bt)
297 8ab593ee Carles
298 8ab593ee Carles
    mol = rwMol.GetMol()
299 8ab593ee Carles
300 8ab593ee Carles
    if allow_charged_fragments:
301 8ab593ee Carles
        mol = set_atomic_charges(
302 8ab593ee Carles
            mol,
303 8ab593ee Carles
            atoms,
304 8ab593ee Carles
            atomic_valence_electrons,
305 8ab593ee Carles
            BO_valences,
306 8ab593ee Carles
            BO_matrix,
307 8ab593ee Carles
            mol_charge)
308 8ab593ee Carles
    else:
309 8ab593ee Carles
        mol = set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences)
310 8ab593ee Carles
311 8ab593ee Carles
    return mol
312 8ab593ee Carles
313 8ab593ee Carles
314 8ab593ee Carles
def set_atomic_charges(mol, atoms, atomic_valence_electrons,
315 8ab593ee Carles
                       BO_valences, BO_matrix, mol_charge):
316 8ab593ee Carles
    """
317 8ab593ee Carles
    """
318 8ab593ee Carles
    q = 0
319 8ab593ee Carles
    for i, atom in enumerate(atoms):
320 8ab593ee Carles
        a = mol.GetAtomWithIdx(i)
321 8ab593ee Carles
        charge = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i])
322 8ab593ee Carles
        q += charge
323 8ab593ee Carles
        if atom == 6:
324 8ab593ee Carles
            number_of_single_bonds_to_C = list(BO_matrix[i, :]).count(1)
325 8ab593ee Carles
            if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
326 8ab593ee Carles
                q += 1
327 8ab593ee Carles
                charge = 0
328 8ab593ee Carles
            if number_of_single_bonds_to_C == 3 and q + 1 < mol_charge:
329 8ab593ee Carles
                q += 2
330 8ab593ee Carles
                charge = 1
331 8ab593ee Carles
332 8ab593ee Carles
        if (abs(charge) > 0):
333 8ab593ee Carles
            a.SetFormalCharge(int(charge))
334 8ab593ee Carles
335 8ab593ee Carles
    mol = clean_charges(mol)
336 8ab593ee Carles
337 8ab593ee Carles
    return mol
338 8ab593ee Carles
339 8ab593ee Carles
340 8ab593ee Carles
def set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences):
341 8ab593ee Carles
    """
342 8ab593ee Carles

343 8ab593ee Carles
    The number of radical electrons = absolute atomic charge
344 8ab593ee Carles

345 8ab593ee Carles
    """
346 8ab593ee Carles
    for i, atom in enumerate(atoms):
347 8ab593ee Carles
        a = mol.GetAtomWithIdx(i)
348 8ab593ee Carles
        charge = get_atomic_charge(
349 8ab593ee Carles
            atom,
350 8ab593ee Carles
            atomic_valence_electrons[atom],
351 8ab593ee Carles
            BO_valences[i])
352 8ab593ee Carles
353 8ab593ee Carles
        if (abs(charge) > 0):
354 8ab593ee Carles
            a.SetNumRadicalElectrons(abs(int(charge)))
355 8ab593ee Carles
356 8ab593ee Carles
    return mol
357 8ab593ee Carles
358 8ab593ee Carles
359 8ab593ee Carles
def get_bonds(UA, AC):
360 8ab593ee Carles
    """
361 8ab593ee Carles

362 8ab593ee Carles
    """
363 8ab593ee Carles
    bonds = []
364 8ab593ee Carles
365 8ab593ee Carles
    for k, i in enumerate(UA):
366 8ab593ee Carles
        for j in UA[k + 1:]:
367 8ab593ee Carles
            if AC[i, j] == 1:
368 8ab593ee Carles
                bonds.append(tuple(sorted([i, j])))
369 8ab593ee Carles
370 8ab593ee Carles
    return bonds
371 8ab593ee Carles
372 8ab593ee Carles
373 8ab593ee Carles
def get_UA_pairs(UA, AC, use_graph=True):
374 8ab593ee Carles
    """
375 8ab593ee Carles

376 8ab593ee Carles
    """
377 8ab593ee Carles
378 8ab593ee Carles
    bonds = get_bonds(UA, AC)
379 8ab593ee Carles
380 8ab593ee Carles
    if len(bonds) == 0:
381 8ab593ee Carles
        return [()]
382 8ab593ee Carles
383 8ab593ee Carles
    if use_graph:
384 8ab593ee Carles
        G = nx.Graph()
385 8ab593ee Carles
        G.add_edges_from(bonds)
386 8ab593ee Carles
        UA_pairs = [list(nx.max_weight_matching(G))]
387 8ab593ee Carles
        return UA_pairs
388 8ab593ee Carles
389 8ab593ee Carles
    max_atoms_in_combo = 0
390 8ab593ee Carles
    UA_pairs = [()]
391 8ab593ee Carles
    for combo in list(itertools.combinations(bonds, int(len(UA) / 2))):
392 8ab593ee Carles
        flat_list = [item for sublist in combo for item in sublist]
393 8ab593ee Carles
        atoms_in_combo = len(set(flat_list))
394 8ab593ee Carles
        if atoms_in_combo > max_atoms_in_combo:
395 8ab593ee Carles
            max_atoms_in_combo = atoms_in_combo
396 8ab593ee Carles
            UA_pairs = [combo]
397 8ab593ee Carles
398 8ab593ee Carles
        elif atoms_in_combo == max_atoms_in_combo:
399 8ab593ee Carles
            UA_pairs.append(combo)
400 8ab593ee Carles
401 8ab593ee Carles
    return UA_pairs
402 8ab593ee Carles
403 8ab593ee Carles
404 8ab593ee Carles
def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
405 8ab593ee Carles
    """
406 8ab593ee Carles

407 8ab593ee Carles
    implemenation of algorithm shown in Figure 2
408 8ab593ee Carles

409 8ab593ee Carles
    UA: unsaturated atoms
410 8ab593ee Carles

411 8ab593ee Carles
    DU: degree of unsaturation (u matrix in Figure)
412 8ab593ee Carles

413 8ab593ee Carles
    best_BO: Bcurr in Figure
414 8ab593ee Carles

415 8ab593ee Carles
    """
416 8ab593ee Carles
417 8ab593ee Carles
    global atomic_valence
418 8ab593ee Carles
    global atomic_valence_electrons
419 8ab593ee Carles
420 8ab593ee Carles
    # make a list of valences, e.g. for CO: [[4],[2,1]]
421 8ab593ee Carles
    valences_list_of_lists = []
422 8ab593ee Carles
    AC_valence = list(AC.sum(axis=1))
423 8ab593ee Carles
    for atomicNum in atoms:
424 8ab593ee Carles
        valences_list_of_lists.append(atomic_valence[atomicNum])
425 8ab593ee Carles
426 8ab593ee Carles
    # convert [[4],[2,1]] to [[4,2],[4,1]]
427 8ab593ee Carles
    valences_list = itertools.product(*valences_list_of_lists)
428 8ab593ee Carles
429 8ab593ee Carles
    best_BO = AC.copy()
430 8ab593ee Carles
431 8ab593ee Carles
    for valences in valences_list:
432 8ab593ee Carles
433 8ab593ee Carles
        UA, DU_from_AC = get_UA(valences, AC_valence)
434 8ab593ee Carles
435 8ab593ee Carles
        check_len = (len(UA) == 0)
436 8ab593ee Carles
        if check_len:
437 8ab593ee Carles
            check_bo = BO_is_OK(AC, AC, charge, DU_from_AC,
438 8ab593ee Carles
                atomic_valence_electrons, atoms, valences,
439 8ab593ee Carles
                allow_charged_fragments=allow_charged_fragments)
440 8ab593ee Carles
        else:
441 8ab593ee Carles
            check_bo = None
442 8ab593ee Carles
443 8ab593ee Carles
        if check_len and check_bo:
444 8ab593ee Carles
            return AC, atomic_valence_electrons
445 8ab593ee Carles
446 8ab593ee Carles
        UA_pairs_list = get_UA_pairs(UA, AC, use_graph=use_graph)
447 8ab593ee Carles
        for UA_pairs in UA_pairs_list:
448 8ab593ee Carles
            BO = get_BO(AC, UA, DU_from_AC, valences, UA_pairs, use_graph=use_graph)
449 8ab593ee Carles
            status = BO_is_OK(BO, AC, charge, DU_from_AC,
450 8ab593ee Carles
                        atomic_valence_electrons, atoms, valences,
451 8ab593ee Carles
                        allow_charged_fragments=allow_charged_fragments)
452 8ab593ee Carles
453 8ab593ee Carles
            if status:
454 8ab593ee Carles
                return BO, atomic_valence_electrons
455 8ab593ee Carles
456 8ab593ee Carles
            elif BO.sum() >= best_BO.sum() and valences_not_too_large(BO, valences):
457 8ab593ee Carles
                best_BO = BO.copy()
458 8ab593ee Carles
459 8ab593ee Carles
    return best_BO, atomic_valence_electrons
460 8ab593ee Carles
461 8ab593ee Carles
462 8ab593ee Carles
def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
463 8ab593ee Carles
    """
464 8ab593ee Carles
    """
465 8ab593ee Carles
466 8ab593ee Carles
    # convert AC matrix to bond order (BO) matrix
467 8ab593ee Carles
    BO, atomic_valence_electrons = AC2BO(
468 8ab593ee Carles
        AC,
469 8ab593ee Carles
        atoms,
470 8ab593ee Carles
        charge,
471 8ab593ee Carles
        allow_charged_fragments=allow_charged_fragments,
472 8ab593ee Carles
        use_graph=use_graph)
473 8ab593ee Carles
474 8ab593ee Carles
    # add BO connectivity and charge info to mol object
475 8ab593ee Carles
    mol = BO2mol(
476 8ab593ee Carles
        mol,
477 8ab593ee Carles
        BO,
478 8ab593ee Carles
        atoms,
479 8ab593ee Carles
        atomic_valence_electrons,
480 8ab593ee Carles
        charge,
481 8ab593ee Carles
        allow_charged_fragments=allow_charged_fragments)
482 8ab593ee Carles
483 8ab593ee Carles
    return mol
484 8ab593ee Carles
485 8ab593ee Carles
486 8ab593ee Carles
def get_proto_mol(atoms):
487 8ab593ee Carles
    """
488 8ab593ee Carles
    """
489 8ab593ee Carles
    mol = Chem.MolFromSmarts("[#" + str(atoms[0]) + "]")
490 8ab593ee Carles
    rwMol = Chem.RWMol(mol)
491 8ab593ee Carles
    for i in range(1, len(atoms)):
492 8ab593ee Carles
        a = Chem.Atom(atoms[i])
493 8ab593ee Carles
        rwMol.AddAtom(a)
494 8ab593ee Carles
495 8ab593ee Carles
    mol = rwMol.GetMol()
496 8ab593ee Carles
497 8ab593ee Carles
    return mol
498 8ab593ee Carles
499 8ab593ee Carles
500 8ab593ee Carles
def read_xyz_file(filename, look_for_charge=True):
501 8ab593ee Carles
    """
502 8ab593ee Carles
    """
503 8ab593ee Carles
504 8ab593ee Carles
    atomic_symbols = []
505 8ab593ee Carles
    xyz_coordinates = []
506 8ab593ee Carles
    charge = 0
507 8ab593ee Carles
    title = ""
508 8ab593ee Carles
509 8ab593ee Carles
    with open(filename, "r") as file:
510 8ab593ee Carles
        for line_number, line in enumerate(file):
511 8ab593ee Carles
            if line_number == 0:
512 8ab593ee Carles
                num_atoms = int(line)
513 8ab593ee Carles
            elif line_number == 1:
514 8ab593ee Carles
                title = line
515 58ede1f9 Carles Martí
                if "charge=" in line:  # TODO: More flexible charge declaration
516 8ab593ee Carles
                    charge = int(line.split("=")[1])
517 8ab593ee Carles
            else:
518 8ab593ee Carles
                atomic_symbol, x, y, z = line.split()
519 8ab593ee Carles
                atomic_symbols.append(atomic_symbol)
520 8ab593ee Carles
                xyz_coordinates.append([float(x), float(y), float(z)])
521 8ab593ee Carles
522 8ab593ee Carles
    atoms = [int_atom(atom) for atom in atomic_symbols]
523 8ab593ee Carles
524 8ab593ee Carles
    return atoms, charge, xyz_coordinates
525 8ab593ee Carles
526 8ab593ee Carles
527 8ab593ee Carles
def xyz2AC(atoms, xyz, charge, use_huckel=False):
528 8ab593ee Carles
    """
529 8ab593ee Carles

530 8ab593ee Carles
    atoms and coordinates to atom connectivity (AC)
531 8ab593ee Carles

532 8ab593ee Carles
    args:
533 8ab593ee Carles
        atoms - int atom types
534 8ab593ee Carles
        xyz - coordinates
535 8ab593ee Carles
        charge - molecule charge
536 8ab593ee Carles

537 8ab593ee Carles
    optional:
538 8ab593ee Carles
        use_huckel - Use Huckel method for atom connecitivty
539 8ab593ee Carles

540 8ab593ee Carles
    returns
541 8ab593ee Carles
        ac - atom connectivity matrix
542 8ab593ee Carles
        mol - rdkit molecule
543 8ab593ee Carles

544 8ab593ee Carles
    """
545 8ab593ee Carles
546 8ab593ee Carles
    if use_huckel:
547 8ab593ee Carles
        return xyz2AC_huckel(atoms, xyz, charge)
548 8ab593ee Carles
    else:
549 8ab593ee Carles
        return xyz2AC_vdW(atoms, xyz)
550 8ab593ee Carles
551 8ab593ee Carles
552 8ab593ee Carles
def xyz2AC_vdW(atoms, xyz):
553 8ab593ee Carles
554 8ab593ee Carles
    # Get mol template
555 8ab593ee Carles
    mol = get_proto_mol(atoms)
556 8ab593ee Carles
557 8ab593ee Carles
    # Set coordinates
558 8ab593ee Carles
    conf = Chem.Conformer(mol.GetNumAtoms())
559 8ab593ee Carles
    for i in range(mol.GetNumAtoms()):
560 8ab593ee Carles
        conf.SetAtomPosition(i, (xyz[i][0], xyz[i][1], xyz[i][2]))
561 8ab593ee Carles
    mol.AddConformer(conf)
562 8ab593ee Carles
563 8ab593ee Carles
    AC = get_AC(mol)
564 8ab593ee Carles
565 8ab593ee Carles
    return AC, mol
566 8ab593ee Carles
567 8ab593ee Carles
568 8ab593ee Carles
def get_AC(mol, covalent_factor=1.3):
569 8ab593ee Carles
    """
570 8ab593ee Carles

571 8ab593ee Carles
    Generate adjacent matrix from atoms and coordinates.
572 8ab593ee Carles

573 8ab593ee Carles
    AC is a (num_atoms, num_atoms) matrix with 1 being covalent bond and 0 is not
574 8ab593ee Carles

575 8ab593ee Carles

576 8ab593ee Carles
    covalent_factor - 1.3 is an arbitrary factor
577 8ab593ee Carles

578 8ab593ee Carles
    args:
579 8ab593ee Carles
        mol - rdkit molobj with 3D conformer
580 8ab593ee Carles

581 8ab593ee Carles
    optional
582 8ab593ee Carles
        covalent_factor - increase covalent bond length threshold with facto
583 8ab593ee Carles

584 8ab593ee Carles
    returns:
585 8ab593ee Carles
        AC - adjacent matrix
586 8ab593ee Carles

587 8ab593ee Carles
    """
588 8ab593ee Carles
589 8ab593ee Carles
    # Calculate distance matrix
590 8ab593ee Carles
    dMat = Chem.Get3DDistanceMatrix(mol)
591 8ab593ee Carles
592 8ab593ee Carles
    pt = Chem.GetPeriodicTable()
593 8ab593ee Carles
    num_atoms = mol.GetNumAtoms()
594 8ab593ee Carles
    AC = np.zeros((num_atoms, num_atoms), dtype=int)
595 8ab593ee Carles
596 8ab593ee Carles
    for i in range(num_atoms):
597 8ab593ee Carles
        a_i = mol.GetAtomWithIdx(i)
598 8ab593ee Carles
        Rcov_i = pt.GetRcovalent(a_i.GetAtomicNum()) * covalent_factor
599 8ab593ee Carles
        for j in range(i + 1, num_atoms):
600 8ab593ee Carles
            a_j = mol.GetAtomWithIdx(j)
601 8ab593ee Carles
            Rcov_j = pt.GetRcovalent(a_j.GetAtomicNum()) * covalent_factor
602 8ab593ee Carles
            if dMat[i, j] <= Rcov_i + Rcov_j:
603 8ab593ee Carles
                AC[i, j] = 1
604 8ab593ee Carles
                AC[j, i] = 1
605 8ab593ee Carles
606 8ab593ee Carles
    return AC
607 8ab593ee Carles
608 8ab593ee Carles
609 8ab593ee Carles
def xyz2AC_huckel(atomicNumList,xyz,charge):
610 8ab593ee Carles
    """
611 8ab593ee Carles

612 8ab593ee Carles
    args
613 8ab593ee Carles
        atomicNumList - atom type list
614 8ab593ee Carles
        xyz - coordinates
615 8ab593ee Carles
        charge - molecule charge
616 8ab593ee Carles

617 8ab593ee Carles
    returns
618 8ab593ee Carles
        ac - atom connectivity
619 8ab593ee Carles
        mol - rdkit molecule
620 8ab593ee Carles

621 8ab593ee Carles
    """
622 8ab593ee Carles
    mol = get_proto_mol(atomicNumList)
623 8ab593ee Carles
624 8ab593ee Carles
    conf = Chem.Conformer(mol.GetNumAtoms())
625 8ab593ee Carles
    for i in range(mol.GetNumAtoms()):
626 8ab593ee Carles
        conf.SetAtomPosition(i,(xyz[i][0],xyz[i][1],xyz[i][2]))
627 8ab593ee Carles
    mol.AddConformer(conf)
628 8ab593ee Carles
629 8ab593ee Carles
    num_atoms = len(atomicNumList)
630 8ab593ee Carles
    AC = np.zeros((num_atoms,num_atoms)).astype(int)
631 8ab593ee Carles
632 8ab593ee Carles
    mol_huckel = Chem.Mol(mol)
633 8ab593ee Carles
    mol_huckel.GetAtomWithIdx(0).SetFormalCharge(charge) #mol charge arbitrarily added to 1st atom    
634 8ab593ee Carles
635 8ab593ee Carles
    passed,result = rdEHTTools.RunMol(mol_huckel)
636 8ab593ee Carles
    opop = result.GetReducedOverlapPopulationMatrix()
637 8ab593ee Carles
    tri = np.zeros((num_atoms, num_atoms))
638 8ab593ee Carles
    tri[np.tril(np.ones((num_atoms, num_atoms), dtype=bool))] = opop #lower triangular to square matrix
639 8ab593ee Carles
    for i in range(num_atoms):
640 8ab593ee Carles
        for j in range(i+1,num_atoms):
641 8ab593ee Carles
            pair_pop = abs(tri[j,i])   
642 8ab593ee Carles
            if pair_pop >= 0.15: #arbitry cutoff for bond. May need adjustment
643 8ab593ee Carles
                AC[i,j] = 1
644 8ab593ee Carles
                AC[j,i] = 1
645 8ab593ee Carles
646 8ab593ee Carles
    return AC, mol
647 8ab593ee Carles
648 8ab593ee Carles
649 8ab593ee Carles
def chiral_stereo_check(mol):
650 8ab593ee Carles
    """
651 8ab593ee Carles
    Find and embed chiral information into the model based on the coordinates
652 8ab593ee Carles

653 8ab593ee Carles
    args:
654 8ab593ee Carles
        mol - rdkit molecule, with embeded conformer
655 8ab593ee Carles

656 8ab593ee Carles
    """
657 8ab593ee Carles
    Chem.SanitizeMol(mol)
658 8ab593ee Carles
    Chem.DetectBondStereochemistry(mol, -1)
659 8ab593ee Carles
    Chem.AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True)
660 8ab593ee Carles
    Chem.AssignAtomChiralTagsFromStructure(mol, -1)
661 8ab593ee Carles
662 8ab593ee Carles
    return
663 8ab593ee Carles
664 8ab593ee Carles
665 8ab593ee Carles
def xyz2mol(atoms, coordinates,
666 8ab593ee Carles
    charge=0,
667 8ab593ee Carles
    allow_charged_fragments=True,
668 8ab593ee Carles
    use_graph=True,
669 8ab593ee Carles
    use_huckel=False,
670 8ab593ee Carles
    embed_chiral=True):
671 8ab593ee Carles
    """
672 8ab593ee Carles
    Generate a rdkit molobj from atoms, coordinates and a total_charge.
673 8ab593ee Carles

674 8ab593ee Carles
    args:
675 8ab593ee Carles
        atoms - list of atom types (int)
676 8ab593ee Carles
        coordinates - 3xN Cartesian coordinates
677 8ab593ee Carles
        charge - total charge of the system (default: 0)
678 8ab593ee Carles

679 8ab593ee Carles
    optional:
680 8ab593ee Carles
        allow_charged_fragments - alternatively radicals are made
681 8ab593ee Carles
        use_graph - use graph (networkx)
682 8ab593ee Carles
        use_huckel - Use Huckel method for atom connectivity prediction
683 8ab593ee Carles
        embed_chiral - embed chiral information to the molecule
684 8ab593ee Carles

685 8ab593ee Carles
    returns:
686 8ab593ee Carles
        mol - rdkit molobj
687 8ab593ee Carles

688 8ab593ee Carles
    """
689 8ab593ee Carles
690 8ab593ee Carles
    # Get atom connectivity (AC) matrix, list of atomic numbers, molecular charge,
691 8ab593ee Carles
    # and mol object with no connectivity information
692 5bfea9b9 Carles
    # print("atoms variable: ", atoms)
693 8ab593ee Carles
    AC, mol = xyz2AC(atoms, coordinates, charge, use_huckel=use_huckel)
694 8ab593ee Carles
695 8ab593ee Carles
    # Convert AC to bond order matrix and add connectivity and charge info to
696 8ab593ee Carles
    # mol object
697 8ab593ee Carles
    new_mol = AC2mol(mol, AC, atoms, charge,
698 8ab593ee Carles
        allow_charged_fragments=allow_charged_fragments,
699 8ab593ee Carles
        use_graph=use_graph)
700 8ab593ee Carles
701 8ab593ee Carles
    # Check for stereocenters and chiral centers
702 8ab593ee Carles
    if embed_chiral:
703 8ab593ee Carles
        chiral_stereo_check(new_mol)
704 8ab593ee Carles
705 8ab593ee Carles
    return new_mol
706 8ab593ee Carles
707 8ab593ee Carles
708 8ab593ee Carles
def main():
709 8ab593ee Carles
710 8ab593ee Carles
711 8ab593ee Carles
    return
712 8ab593ee Carles
713 8ab593ee Carles
714 8ab593ee Carles
if __name__ == "__main__":
715 8ab593ee Carles
716 8ab593ee Carles
    import argparse
717 8ab593ee Carles
718 8ab593ee Carles
    parser = argparse.ArgumentParser() # usage='%(prog)s [options] molecule.xyz')
719 8ab593ee Carles
    parser.add_argument('structure', metavar='structure', type=str)
720 8ab593ee Carles
    parser.add_argument('-s', '--sdf',
721 8ab593ee Carles
        action="store_true",
722 8ab593ee Carles
        help="Dump sdf file")
723 8ab593ee Carles
    parser.add_argument('--ignore-chiral',
724 8ab593ee Carles
        action="store_true",
725 8ab593ee Carles
        help="Ignore chiral centers")
726 8ab593ee Carles
    parser.add_argument('--no-charged-fragments',
727 8ab593ee Carles
        action="store_true",
728 8ab593ee Carles
        help="Allow radicals to be made")
729 8ab593ee Carles
    parser.add_argument('--no-graph',
730 8ab593ee Carles
        action="store_true",
731 8ab593ee Carles
        help="Run xyz2mol without networkx dependencies")
732 8ab593ee Carles
733 8ab593ee Carles
    # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
734 8ab593ee Carles
    # otherwise van der Waals radii are used
735 8ab593ee Carles
    parser.add_argument('--use-huckel',
736 8ab593ee Carles
        action="store_true",
737 8ab593ee Carles
        help="Use Huckel method for atom connectivity")
738 8ab593ee Carles
    parser.add_argument('-o', '--output-format',
739 8ab593ee Carles
        action="store",
740 8ab593ee Carles
        type=str,
741 8ab593ee Carles
        help="Output format [smiles,sdf] (default=sdf)")
742 8ab593ee Carles
    parser.add_argument('-c', '--charge',
743 8ab593ee Carles
        action="store",
744 8ab593ee Carles
        metavar="int",
745 8ab593ee Carles
        type=int,
746 8ab593ee Carles
        help="Total charge of the system")
747 8ab593ee Carles
748 8ab593ee Carles
    args = parser.parse_args()
749 8ab593ee Carles
750 8ab593ee Carles
    # read xyz file
751 8ab593ee Carles
    filename = args.structure
752 8ab593ee Carles
753 8ab593ee Carles
    # allow for charged fragments, alternatively radicals are made
754 8ab593ee Carles
    charged_fragments = not args.no_charged_fragments
755 8ab593ee Carles
756 8ab593ee Carles
    # quick is faster for large systems but requires networkx
757 8ab593ee Carles
    # if you don't want to install networkx set quick=False and
758 8ab593ee Carles
    # uncomment 'import networkx as nx' at the top of the file
759 8ab593ee Carles
    quick = not args.no_graph
760 8ab593ee Carles
761 8ab593ee Carles
    # chiral comment
762 8ab593ee Carles
    embed_chiral = not args.ignore_chiral
763 8ab593ee Carles
764 8ab593ee Carles
    # read atoms and coordinates. Try to find the charge
765 8ab593ee Carles
    atoms, charge, xyz_coordinates = read_xyz_file(filename)
766 8ab593ee Carles
767 8ab593ee Carles
    # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
768 8ab593ee Carles
    # otherwise van der Waals radii are used
769 8ab593ee Carles
    use_huckel = args.use_huckel
770 8ab593ee Carles
771 8ab593ee Carles
    # if explicit charge from args, set it
772 8ab593ee Carles
    if args.charge is not None:
773 8ab593ee Carles
        charge = int(args.charge)
774 8ab593ee Carles
775 8ab593ee Carles
    # Get the molobj
776 8ab593ee Carles
    mol = xyz2mol(atoms, xyz_coordinates,
777 8ab593ee Carles
        charge=charge,
778 8ab593ee Carles
        use_graph=quick,
779 8ab593ee Carles
        allow_charged_fragments=charged_fragments,
780 8ab593ee Carles
        embed_chiral=embed_chiral,
781 8ab593ee Carles
        use_huckel=use_huckel)
782 8ab593ee Carles
783 8ab593ee Carles
    # Print output
784 8ab593ee Carles
    if args.output_format == "sdf":
785 8ab593ee Carles
        txt = Chem.MolToMolBlock(mol)
786 8ab593ee Carles
        print(txt)
787 8ab593ee Carles
788 8ab593ee Carles
    else:
789 8ab593ee Carles
        # Canonical hack
790 8ab593ee Carles
        isomeric_smiles = not args.ignore_chiral
791 8ab593ee Carles
        smiles = Chem.MolToSmiles(mol, isomericSmiles=isomeric_smiles)
792 8ab593ee Carles
        m = Chem.MolFromSmiles(smiles)
793 8ab593ee Carles
        smiles = Chem.MolToSmiles(m, isomericSmiles=isomeric_smiles)
794 8ab593ee Carles
        print(smiles)