dockonsurf / modules / xyz2mol.py @ 78fcb188
Historique | Voir | Annoter | Télécharger (21,01 ko)
1 | 8ab593ee | Carles | """
|
---|---|---|---|
2 | 8ab593ee | Carles | Module for generating rdkit molobj/smiles/molecular graph from free atoms
|
3 | 8ab593ee | Carles |
|
4 | 8ab593ee | Carles | Implementation by Jan H. Jensen, based on the paper
|
5 | 8ab593ee | Carles |
|
6 | 8ab593ee | Carles | Yeonjoon Kim and Woo Youn Kim
|
7 | 8ab593ee | Carles | "Universal Structure Conversion Method for Organic Molecules: From Atomic Connectivity
|
8 | 8ab593ee | Carles | to Three-Dimensional Geometry"
|
9 | 8ab593ee | Carles | Bull. Korean Chem. Soc. 2015, Vol. 36, 1769-1777
|
10 | 8ab593ee | Carles | DOI: 10.1002/bkcs.10334
|
11 | 8ab593ee | Carles |
|
12 | 8ab593ee | Carles | """
|
13 | 8ab593ee | Carles | |
14 | 8ab593ee | Carles | import copy |
15 | 8ab593ee | Carles | import itertools |
16 | 8ab593ee | Carles | |
17 | 8ab593ee | Carles | from rdkit.Chem import rdmolops |
18 | 8ab593ee | Carles | try:
|
19 | 8ab593ee | Carles | from rdkit.Chem import rdEHTTools #requires RDKit 2019.9.1 or later |
20 | 8ab593ee | Carles | except ImportError: |
21 | 8ab593ee | Carles | rdEHTTools = None
|
22 | 8ab593ee | Carles | |
23 | 8ab593ee | Carles | from collections import defaultdict |
24 | 8ab593ee | Carles | |
25 | 8ab593ee | Carles | import numpy as np |
26 | 8ab593ee | Carles | import networkx as nx |
27 | 8ab593ee | Carles | |
28 | 8ab593ee | Carles | from rdkit import Chem |
29 | 8ab593ee | Carles | from rdkit.Chem import AllChem, rdmolops |
30 | 8ab593ee | Carles | |
31 | 8ab593ee | Carles | global __ATOM_LIST__
|
32 | 8ab593ee | Carles | __ATOM_LIST__ = \ |
33 | 8ab593ee | Carles | ['h', 'he', |
34 | 8ab593ee | Carles | 'li', 'be', 'b', 'c', 'n', 'o', 'f', 'ne', |
35 | 8ab593ee | Carles | 'na', 'mg', 'al', 'si', 'p', 's', 'cl', 'ar', |
36 | 8ab593ee | Carles | 'k', 'ca', 'sc', 'ti', 'v ', 'cr', 'mn', 'fe', 'co', 'ni', 'cu', |
37 | 8ab593ee | Carles | 'zn', 'ga', 'ge', 'as', 'se', 'br', 'kr', |
38 | 8ab593ee | Carles | 'rb', 'sr', 'y', 'zr', 'nb', 'mo', 'tc', 'ru', 'rh', 'pd', 'ag', |
39 | 8ab593ee | Carles | 'cd', 'in', 'sn', 'sb', 'te', 'i', 'xe', |
40 | 8ab593ee | Carles | 'cs', 'ba', 'la', 'ce', 'pr', 'nd', 'pm', 'sm', 'eu', 'gd', 'tb', 'dy', |
41 | 8ab593ee | Carles | 'ho', 'er', 'tm', 'yb', 'lu', 'hf', 'ta', 'w', 're', 'os', 'ir', 'pt', |
42 | 8ab593ee | Carles | 'au', 'hg', 'tl', 'pb', 'bi', 'po', 'at', 'rn', |
43 | 8ab593ee | Carles | 'fr', 'ra', 'ac', 'th', 'pa', 'u', 'np', 'pu'] |
44 | 8ab593ee | Carles | |
45 | 8ab593ee | Carles | |
46 | 8ab593ee | Carles | global atomic_valence
|
47 | 8ab593ee | Carles | global atomic_valence_electrons
|
48 | 8ab593ee | Carles | |
49 | 8ab593ee | Carles | atomic_valence = defaultdict(list)
|
50 | 8ab593ee | Carles | atomic_valence[1] = [1] |
51 | 8ab593ee | Carles | atomic_valence[6] = [4] |
52 | 8ab593ee | Carles | atomic_valence[7] = [3,4] |
53 | 8ab593ee | Carles | atomic_valence[8] = [2,1] |
54 | 8ab593ee | Carles | atomic_valence[9] = [1] |
55 | 8ab593ee | Carles | atomic_valence[14] = [4] |
56 | 8ab593ee | Carles | atomic_valence[15] = [5,3] #[5,4,3] |
57 | 8ab593ee | Carles | atomic_valence[16] = [6,3,2] #[6,4,2] |
58 | 8ab593ee | Carles | atomic_valence[17] = [1] |
59 | 8ab593ee | Carles | atomic_valence[32] = [4] |
60 | 8ab593ee | Carles | atomic_valence[35] = [1] |
61 | 8ab593ee | Carles | atomic_valence[53] = [1] |
62 | 8ab593ee | Carles | |
63 | 8ab593ee | Carles | atomic_valence_electrons = {} |
64 | 8ab593ee | Carles | atomic_valence_electrons[1] = 1 |
65 | 8ab593ee | Carles | atomic_valence_electrons[6] = 4 |
66 | 8ab593ee | Carles | atomic_valence_electrons[7] = 5 |
67 | 8ab593ee | Carles | atomic_valence_electrons[8] = 6 |
68 | 8ab593ee | Carles | atomic_valence_electrons[9] = 7 |
69 | 8ab593ee | Carles | atomic_valence_electrons[14] = 4 |
70 | 8ab593ee | Carles | atomic_valence_electrons[15] = 5 |
71 | 8ab593ee | Carles | atomic_valence_electrons[16] = 6 |
72 | 8ab593ee | Carles | atomic_valence_electrons[17] = 7 |
73 | 8ab593ee | Carles | atomic_valence_electrons[32] = 4 |
74 | 8ab593ee | Carles | atomic_valence_electrons[35] = 7 |
75 | 8ab593ee | Carles | atomic_valence_electrons[53] = 7 |
76 | 8ab593ee | Carles | |
77 | 8ab593ee | Carles | |
78 | 8ab593ee | Carles | def str_atom(atom): |
79 | 8ab593ee | Carles | """
|
80 | 8ab593ee | Carles | convert integer atom to string atom
|
81 | 8ab593ee | Carles | """
|
82 | 8ab593ee | Carles | global __ATOM_LIST__
|
83 | 8ab593ee | Carles | atom = __ATOM_LIST__[atom - 1]
|
84 | 8ab593ee | Carles | return atom
|
85 | 8ab593ee | Carles | |
86 | 8ab593ee | Carles | |
87 | 8ab593ee | Carles | def int_atom(atom): |
88 | 8ab593ee | Carles | """
|
89 | 8ab593ee | Carles | convert str atom to integer atom
|
90 | 8ab593ee | Carles | """
|
91 | 8ab593ee | Carles | global __ATOM_LIST__
|
92 | b5163003 | Carles | # print(atom)
|
93 | 8ab593ee | Carles | atom = atom.lower() |
94 | 8ab593ee | Carles | return __ATOM_LIST__.index(atom) + 1 |
95 | 8ab593ee | Carles | |
96 | 8ab593ee | Carles | |
97 | 8ab593ee | Carles | def get_UA(maxValence_list, valence_list): |
98 | 8ab593ee | Carles | """
|
99 | 8ab593ee | Carles | """
|
100 | 8ab593ee | Carles | UA = [] |
101 | 8ab593ee | Carles | DU = [] |
102 | 8ab593ee | Carles | for i, (maxValence, valence) in enumerate(zip(maxValence_list, valence_list)): |
103 | 8ab593ee | Carles | if not maxValence - valence > 0: |
104 | 8ab593ee | Carles | continue
|
105 | 8ab593ee | Carles | UA.append(i) |
106 | 8ab593ee | Carles | DU.append(maxValence - valence) |
107 | 8ab593ee | Carles | return UA, DU
|
108 | 8ab593ee | Carles | |
109 | 8ab593ee | Carles | |
110 | 8ab593ee | Carles | def get_BO(AC, UA, DU, valences, UA_pairs, use_graph=True): |
111 | 8ab593ee | Carles | """
|
112 | 8ab593ee | Carles | """
|
113 | 8ab593ee | Carles | BO = AC.copy() |
114 | 8ab593ee | Carles | DU_save = [] |
115 | 8ab593ee | Carles | |
116 | 8ab593ee | Carles | while DU_save != DU:
|
117 | 8ab593ee | Carles | for i, j in UA_pairs: |
118 | 8ab593ee | Carles | BO[i, j] += 1
|
119 | 8ab593ee | Carles | BO[j, i] += 1
|
120 | 8ab593ee | Carles | |
121 | 8ab593ee | Carles | BO_valence = list(BO.sum(axis=1)) |
122 | 8ab593ee | Carles | DU_save = copy.copy(DU) |
123 | 8ab593ee | Carles | UA, DU = get_UA(valences, BO_valence) |
124 | 8ab593ee | Carles | UA_pairs = get_UA_pairs(UA, AC, use_graph=use_graph)[0]
|
125 | 8ab593ee | Carles | |
126 | 8ab593ee | Carles | return BO
|
127 | 8ab593ee | Carles | |
128 | 8ab593ee | Carles | |
129 | 8ab593ee | Carles | def valences_not_too_large(BO, valences): |
130 | 8ab593ee | Carles | """
|
131 | 8ab593ee | Carles | """
|
132 | 8ab593ee | Carles | number_of_bonds_list = BO.sum(axis=1)
|
133 | 8ab593ee | Carles | for valence, number_of_bonds in zip(valences, number_of_bonds_list): |
134 | 8ab593ee | Carles | if number_of_bonds > valence:
|
135 | 8ab593ee | Carles | return False |
136 | 8ab593ee | Carles | |
137 | 8ab593ee | Carles | return True |
138 | 8ab593ee | Carles | |
139 | 8ab593ee | Carles | |
140 | 8ab593ee | Carles | def BO_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valances, |
141 | 8ab593ee | Carles | allow_charged_fragments=True):
|
142 | 8ab593ee | Carles | """
|
143 | 8ab593ee | Carles | Sanity of bond-orders
|
144 | 8ab593ee | Carles |
|
145 | 8ab593ee | Carles | args:
|
146 | 8ab593ee | Carles | BO -
|
147 | 8ab593ee | Carles | AC -
|
148 | 8ab593ee | Carles | charge -
|
149 | 8ab593ee | Carles | DU -
|
150 | 8ab593ee | Carles |
|
151 | 8ab593ee | Carles |
|
152 | 8ab593ee | Carles | optional
|
153 | 8ab593ee | Carles | allow_charges_fragments -
|
154 | 8ab593ee | Carles |
|
155 | 8ab593ee | Carles |
|
156 | 8ab593ee | Carles | returns:
|
157 | 8ab593ee | Carles | boolean - true of molecule is OK, false if not
|
158 | 8ab593ee | Carles | """
|
159 | 8ab593ee | Carles | |
160 | 8ab593ee | Carles | if not valences_not_too_large(BO, valances): |
161 | 8ab593ee | Carles | return False |
162 | 8ab593ee | Carles | |
163 | 8ab593ee | Carles | # total charge
|
164 | 8ab593ee | Carles | Q = 0
|
165 | 8ab593ee | Carles | |
166 | 8ab593ee | Carles | # charge fragment list
|
167 | 8ab593ee | Carles | q_list = [] |
168 | 8ab593ee | Carles | |
169 | 8ab593ee | Carles | if allow_charged_fragments:
|
170 | 8ab593ee | Carles | |
171 | 8ab593ee | Carles | BO_valences = list(BO.sum(axis=1)) |
172 | 8ab593ee | Carles | for i, atom in enumerate(atoms): |
173 | 8ab593ee | Carles | q = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i]) |
174 | 8ab593ee | Carles | Q += q |
175 | 8ab593ee | Carles | if atom == 6: |
176 | 8ab593ee | Carles | number_of_single_bonds_to_C = list(BO[i, :]).count(1) |
177 | 8ab593ee | Carles | if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2: |
178 | 8ab593ee | Carles | Q += 1
|
179 | 8ab593ee | Carles | q = 2
|
180 | 8ab593ee | Carles | if number_of_single_bonds_to_C == 3 and Q + 1 < charge: |
181 | 8ab593ee | Carles | Q += 2
|
182 | 8ab593ee | Carles | q = 1
|
183 | 8ab593ee | Carles | |
184 | 8ab593ee | Carles | if q != 0: |
185 | 8ab593ee | Carles | q_list.append(q) |
186 | 8ab593ee | Carles | |
187 | 8ab593ee | Carles | check_sum = (BO - AC).sum() == sum(DU)
|
188 | 8ab593ee | Carles | check_charge = charge == Q |
189 | 8ab593ee | Carles | # check_len = len(q_list) <= abs(charge)
|
190 | 8ab593ee | Carles | |
191 | 8ab593ee | Carles | if check_sum and check_charge: |
192 | 8ab593ee | Carles | return True |
193 | 8ab593ee | Carles | |
194 | 8ab593ee | Carles | return False |
195 | 8ab593ee | Carles | |
196 | 8ab593ee | Carles | |
197 | 8ab593ee | Carles | def get_atomic_charge(atom, atomic_valence_electrons, BO_valence): |
198 | 8ab593ee | Carles | """
|
199 | 8ab593ee | Carles | """
|
200 | 8ab593ee | Carles | |
201 | 8ab593ee | Carles | if atom == 1: |
202 | 8ab593ee | Carles | charge = 1 - BO_valence
|
203 | 8ab593ee | Carles | elif atom == 5: |
204 | 8ab593ee | Carles | charge = 3 - BO_valence
|
205 | 8ab593ee | Carles | elif atom == 15 and BO_valence == 5: |
206 | 8ab593ee | Carles | charge = 0
|
207 | 8ab593ee | Carles | elif atom == 16 and BO_valence == 6: |
208 | 8ab593ee | Carles | charge = 0
|
209 | 8ab593ee | Carles | else:
|
210 | 8ab593ee | Carles | charge = atomic_valence_electrons - 8 + BO_valence
|
211 | 8ab593ee | Carles | |
212 | 8ab593ee | Carles | return charge
|
213 | 8ab593ee | Carles | |
214 | 8ab593ee | Carles | |
215 | 8ab593ee | Carles | def clean_charges(mol): |
216 | 8ab593ee | Carles | """
|
217 | 8ab593ee | Carles | This hack should not be needed anymore, but is kept just in case
|
218 | 8ab593ee | Carles |
|
219 | 8ab593ee | Carles | """
|
220 | 8ab593ee | Carles | |
221 | 8ab593ee | Carles | Chem.SanitizeMol(mol) |
222 | 8ab593ee | Carles | #rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',
|
223 | 8ab593ee | Carles | # '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]',
|
224 | 8ab593ee | Carles | # '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]',
|
225 | 8ab593ee | Carles | # '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]',
|
226 | 8ab593ee | Carles | # '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]',
|
227 | 8ab593ee | Carles | # '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]']
|
228 | 8ab593ee | Carles | |
229 | 8ab593ee | Carles | rxn_smarts = ['[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][CX3-,NX3-:5][#6,#7:6]1=[#6,#7:7]>>'
|
230 | 8ab593ee | Carles | '[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][-0,-0:5]=[#6,#7:6]1[#6-,#7-:7]',
|
231 | 8ab593ee | Carles | '[#6,#7:1]1=[#6,#7:2][#6,#7:3](=[#6,#7:4])[#6,#7:5]=[#6,#7:6][CX3-,NX3-:7]1>>'
|
232 | 8ab593ee | Carles | '[#6,#7:1]1=[#6,#7:2][#6,#7:3]([#6-,#7-:4])=[#6,#7:5][#6,#7:6]=[-0,-0:7]1']
|
233 | 8ab593ee | Carles | |
234 | 8ab593ee | Carles | fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False) |
235 | 8ab593ee | Carles | |
236 | 8ab593ee | Carles | for i, fragment in enumerate(fragments): |
237 | 8ab593ee | Carles | for smarts in rxn_smarts: |
238 | 8ab593ee | Carles | patt = Chem.MolFromSmarts(smarts.split(">>")[0]) |
239 | 8ab593ee | Carles | while fragment.HasSubstructMatch(patt):
|
240 | 8ab593ee | Carles | rxn = AllChem.ReactionFromSmarts(smarts) |
241 | 8ab593ee | Carles | ps = rxn.RunReactants((fragment,)) |
242 | 8ab593ee | Carles | fragment = ps[0][0] |
243 | 8ab593ee | Carles | Chem.SanitizeMol(fragment) |
244 | 8ab593ee | Carles | if i == 0: |
245 | 8ab593ee | Carles | mol = fragment |
246 | 8ab593ee | Carles | else:
|
247 | 8ab593ee | Carles | mol = Chem.CombineMols(mol, fragment) |
248 | 8ab593ee | Carles | |
249 | 8ab593ee | Carles | return mol
|
250 | 8ab593ee | Carles | |
251 | 8ab593ee | Carles | |
252 | 8ab593ee | Carles | def BO2mol(mol, BO_matrix, atoms, atomic_valence_electrons, |
253 | 8ab593ee | Carles | mol_charge, allow_charged_fragments=True):
|
254 | 8ab593ee | Carles | """
|
255 | 8ab593ee | Carles | based on code written by Paolo Toscani
|
256 | 8ab593ee | Carles |
|
257 | 8ab593ee | Carles | From bond order, atoms, valence structure and total charge, generate an
|
258 | 8ab593ee | Carles | rdkit molecule.
|
259 | 8ab593ee | Carles |
|
260 | 8ab593ee | Carles | args:
|
261 | 8ab593ee | Carles | mol - rdkit molecule
|
262 | 8ab593ee | Carles | BO_matrix - bond order matrix of molecule
|
263 | 8ab593ee | Carles | atoms - list of integer atomic symbols
|
264 | 8ab593ee | Carles | atomic_valence_electrons -
|
265 | 8ab593ee | Carles | mol_charge - total charge of molecule
|
266 | 8ab593ee | Carles |
|
267 | 8ab593ee | Carles | optional:
|
268 | 8ab593ee | Carles | allow_charged_fragments - bool - allow charged fragments
|
269 | 8ab593ee | Carles |
|
270 | 8ab593ee | Carles | returns
|
271 | 8ab593ee | Carles | mol - updated rdkit molecule with bond connectivity
|
272 | 8ab593ee | Carles |
|
273 | 8ab593ee | Carles | """
|
274 | 8ab593ee | Carles | |
275 | 8ab593ee | Carles | l = len(BO_matrix)
|
276 | 8ab593ee | Carles | l2 = len(atoms)
|
277 | 8ab593ee | Carles | BO_valences = list(BO_matrix.sum(axis=1)) |
278 | 8ab593ee | Carles | |
279 | 8ab593ee | Carles | if (l != l2):
|
280 | 8ab593ee | Carles | raise RuntimeError('sizes of adjMat ({0:d}) and Atoms {1:d} differ'.format(l, l2)) |
281 | 8ab593ee | Carles | |
282 | 8ab593ee | Carles | rwMol = Chem.RWMol(mol) |
283 | 8ab593ee | Carles | |
284 | 8ab593ee | Carles | bondTypeDict = { |
285 | 8ab593ee | Carles | 1: Chem.BondType.SINGLE,
|
286 | 8ab593ee | Carles | 2: Chem.BondType.DOUBLE,
|
287 | 8ab593ee | Carles | 3: Chem.BondType.TRIPLE
|
288 | 8ab593ee | Carles | } |
289 | 8ab593ee | Carles | |
290 | 8ab593ee | Carles | for i in range(l): |
291 | 8ab593ee | Carles | for j in range(i + 1, l): |
292 | 8ab593ee | Carles | bo = int(round(BO_matrix[i, j])) |
293 | 8ab593ee | Carles | if (bo == 0): |
294 | 8ab593ee | Carles | continue
|
295 | 8ab593ee | Carles | bt = bondTypeDict.get(bo, Chem.BondType.SINGLE) |
296 | 8ab593ee | Carles | rwMol.AddBond(i, j, bt) |
297 | 8ab593ee | Carles | |
298 | 8ab593ee | Carles | mol = rwMol.GetMol() |
299 | 8ab593ee | Carles | |
300 | 8ab593ee | Carles | if allow_charged_fragments:
|
301 | 8ab593ee | Carles | mol = set_atomic_charges( |
302 | 8ab593ee | Carles | mol, |
303 | 8ab593ee | Carles | atoms, |
304 | 8ab593ee | Carles | atomic_valence_electrons, |
305 | 8ab593ee | Carles | BO_valences, |
306 | 8ab593ee | Carles | BO_matrix, |
307 | 8ab593ee | Carles | mol_charge) |
308 | 8ab593ee | Carles | else:
|
309 | 8ab593ee | Carles | mol = set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences) |
310 | 8ab593ee | Carles | |
311 | 8ab593ee | Carles | return mol
|
312 | 8ab593ee | Carles | |
313 | 8ab593ee | Carles | |
314 | 8ab593ee | Carles | def set_atomic_charges(mol, atoms, atomic_valence_electrons, |
315 | 8ab593ee | Carles | BO_valences, BO_matrix, mol_charge): |
316 | 8ab593ee | Carles | """
|
317 | 8ab593ee | Carles | """
|
318 | 8ab593ee | Carles | q = 0
|
319 | 8ab593ee | Carles | for i, atom in enumerate(atoms): |
320 | 8ab593ee | Carles | a = mol.GetAtomWithIdx(i) |
321 | 8ab593ee | Carles | charge = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i]) |
322 | 8ab593ee | Carles | q += charge |
323 | 8ab593ee | Carles | if atom == 6: |
324 | 8ab593ee | Carles | number_of_single_bonds_to_C = list(BO_matrix[i, :]).count(1) |
325 | 8ab593ee | Carles | if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2: |
326 | 8ab593ee | Carles | q += 1
|
327 | 8ab593ee | Carles | charge = 0
|
328 | 8ab593ee | Carles | if number_of_single_bonds_to_C == 3 and q + 1 < mol_charge: |
329 | 8ab593ee | Carles | q += 2
|
330 | 8ab593ee | Carles | charge = 1
|
331 | 8ab593ee | Carles | |
332 | 8ab593ee | Carles | if (abs(charge) > 0): |
333 | 8ab593ee | Carles | a.SetFormalCharge(int(charge))
|
334 | 8ab593ee | Carles | |
335 | 8ab593ee | Carles | mol = clean_charges(mol) |
336 | 8ab593ee | Carles | |
337 | 8ab593ee | Carles | return mol
|
338 | 8ab593ee | Carles | |
339 | 8ab593ee | Carles | |
340 | 8ab593ee | Carles | def set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences): |
341 | 8ab593ee | Carles | """
|
342 | 8ab593ee | Carles |
|
343 | 8ab593ee | Carles | The number of radical electrons = absolute atomic charge
|
344 | 8ab593ee | Carles |
|
345 | 8ab593ee | Carles | """
|
346 | 8ab593ee | Carles | for i, atom in enumerate(atoms): |
347 | 8ab593ee | Carles | a = mol.GetAtomWithIdx(i) |
348 | 8ab593ee | Carles | charge = get_atomic_charge( |
349 | 8ab593ee | Carles | atom, |
350 | 8ab593ee | Carles | atomic_valence_electrons[atom], |
351 | 8ab593ee | Carles | BO_valences[i]) |
352 | 8ab593ee | Carles | |
353 | 8ab593ee | Carles | if (abs(charge) > 0): |
354 | 8ab593ee | Carles | a.SetNumRadicalElectrons(abs(int(charge))) |
355 | 8ab593ee | Carles | |
356 | 8ab593ee | Carles | return mol
|
357 | 8ab593ee | Carles | |
358 | 8ab593ee | Carles | |
359 | 8ab593ee | Carles | def get_bonds(UA, AC): |
360 | 8ab593ee | Carles | """
|
361 | 8ab593ee | Carles |
|
362 | 8ab593ee | Carles | """
|
363 | 8ab593ee | Carles | bonds = [] |
364 | 8ab593ee | Carles | |
365 | 8ab593ee | Carles | for k, i in enumerate(UA): |
366 | 8ab593ee | Carles | for j in UA[k + 1:]: |
367 | 8ab593ee | Carles | if AC[i, j] == 1: |
368 | 8ab593ee | Carles | bonds.append(tuple(sorted([i, j]))) |
369 | 8ab593ee | Carles | |
370 | 8ab593ee | Carles | return bonds
|
371 | 8ab593ee | Carles | |
372 | 8ab593ee | Carles | |
373 | 8ab593ee | Carles | def get_UA_pairs(UA, AC, use_graph=True): |
374 | 8ab593ee | Carles | """
|
375 | 8ab593ee | Carles |
|
376 | 8ab593ee | Carles | """
|
377 | 8ab593ee | Carles | |
378 | 8ab593ee | Carles | bonds = get_bonds(UA, AC) |
379 | 8ab593ee | Carles | |
380 | 8ab593ee | Carles | if len(bonds) == 0: |
381 | 8ab593ee | Carles | return [()]
|
382 | 8ab593ee | Carles | |
383 | 8ab593ee | Carles | if use_graph:
|
384 | 8ab593ee | Carles | G = nx.Graph() |
385 | 8ab593ee | Carles | G.add_edges_from(bonds) |
386 | 8ab593ee | Carles | UA_pairs = [list(nx.max_weight_matching(G))]
|
387 | 8ab593ee | Carles | return UA_pairs
|
388 | 8ab593ee | Carles | |
389 | 8ab593ee | Carles | max_atoms_in_combo = 0
|
390 | 8ab593ee | Carles | UA_pairs = [()] |
391 | 8ab593ee | Carles | for combo in list(itertools.combinations(bonds, int(len(UA) / 2))): |
392 | 8ab593ee | Carles | flat_list = [item for sublist in combo for item in sublist] |
393 | 8ab593ee | Carles | atoms_in_combo = len(set(flat_list)) |
394 | 8ab593ee | Carles | if atoms_in_combo > max_atoms_in_combo:
|
395 | 8ab593ee | Carles | max_atoms_in_combo = atoms_in_combo |
396 | 8ab593ee | Carles | UA_pairs = [combo] |
397 | 8ab593ee | Carles | |
398 | 8ab593ee | Carles | elif atoms_in_combo == max_atoms_in_combo:
|
399 | 8ab593ee | Carles | UA_pairs.append(combo) |
400 | 8ab593ee | Carles | |
401 | 8ab593ee | Carles | return UA_pairs
|
402 | 8ab593ee | Carles | |
403 | 8ab593ee | Carles | |
404 | 8ab593ee | Carles | def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True): |
405 | 8ab593ee | Carles | """
|
406 | 8ab593ee | Carles |
|
407 | 8ab593ee | Carles | implemenation of algorithm shown in Figure 2
|
408 | 8ab593ee | Carles |
|
409 | 8ab593ee | Carles | UA: unsaturated atoms
|
410 | 8ab593ee | Carles |
|
411 | 8ab593ee | Carles | DU: degree of unsaturation (u matrix in Figure)
|
412 | 8ab593ee | Carles |
|
413 | 8ab593ee | Carles | best_BO: Bcurr in Figure
|
414 | 8ab593ee | Carles |
|
415 | 8ab593ee | Carles | """
|
416 | 8ab593ee | Carles | |
417 | 8ab593ee | Carles | global atomic_valence
|
418 | 8ab593ee | Carles | global atomic_valence_electrons
|
419 | 8ab593ee | Carles | |
420 | 8ab593ee | Carles | # make a list of valences, e.g. for CO: [[4],[2,1]]
|
421 | 8ab593ee | Carles | valences_list_of_lists = [] |
422 | 8ab593ee | Carles | AC_valence = list(AC.sum(axis=1)) |
423 | 8ab593ee | Carles | for atomicNum in atoms: |
424 | 8ab593ee | Carles | valences_list_of_lists.append(atomic_valence[atomicNum]) |
425 | 8ab593ee | Carles | |
426 | 8ab593ee | Carles | # convert [[4],[2,1]] to [[4,2],[4,1]]
|
427 | 8ab593ee | Carles | valences_list = itertools.product(*valences_list_of_lists) |
428 | 8ab593ee | Carles | |
429 | 8ab593ee | Carles | best_BO = AC.copy() |
430 | 8ab593ee | Carles | |
431 | 8ab593ee | Carles | for valences in valences_list: |
432 | 8ab593ee | Carles | |
433 | 8ab593ee | Carles | UA, DU_from_AC = get_UA(valences, AC_valence) |
434 | 8ab593ee | Carles | |
435 | 8ab593ee | Carles | check_len = (len(UA) == 0) |
436 | 8ab593ee | Carles | if check_len:
|
437 | 8ab593ee | Carles | check_bo = BO_is_OK(AC, AC, charge, DU_from_AC, |
438 | 8ab593ee | Carles | atomic_valence_electrons, atoms, valences, |
439 | 8ab593ee | Carles | allow_charged_fragments=allow_charged_fragments) |
440 | 8ab593ee | Carles | else:
|
441 | 8ab593ee | Carles | check_bo = None
|
442 | 8ab593ee | Carles | |
443 | 8ab593ee | Carles | if check_len and check_bo: |
444 | 8ab593ee | Carles | return AC, atomic_valence_electrons
|
445 | 8ab593ee | Carles | |
446 | 8ab593ee | Carles | UA_pairs_list = get_UA_pairs(UA, AC, use_graph=use_graph) |
447 | 8ab593ee | Carles | for UA_pairs in UA_pairs_list: |
448 | 8ab593ee | Carles | BO = get_BO(AC, UA, DU_from_AC, valences, UA_pairs, use_graph=use_graph) |
449 | 8ab593ee | Carles | status = BO_is_OK(BO, AC, charge, DU_from_AC, |
450 | 8ab593ee | Carles | atomic_valence_electrons, atoms, valences, |
451 | 8ab593ee | Carles | allow_charged_fragments=allow_charged_fragments) |
452 | 8ab593ee | Carles | |
453 | 8ab593ee | Carles | if status:
|
454 | 8ab593ee | Carles | return BO, atomic_valence_electrons
|
455 | 8ab593ee | Carles | |
456 | 8ab593ee | Carles | elif BO.sum() >= best_BO.sum() and valences_not_too_large(BO, valences): |
457 | 8ab593ee | Carles | best_BO = BO.copy() |
458 | 8ab593ee | Carles | |
459 | 8ab593ee | Carles | return best_BO, atomic_valence_electrons
|
460 | 8ab593ee | Carles | |
461 | 8ab593ee | Carles | |
462 | 8ab593ee | Carles | def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, use_graph=True): |
463 | 8ab593ee | Carles | """
|
464 | 8ab593ee | Carles | """
|
465 | 8ab593ee | Carles | |
466 | 8ab593ee | Carles | # convert AC matrix to bond order (BO) matrix
|
467 | 8ab593ee | Carles | BO, atomic_valence_electrons = AC2BO( |
468 | 8ab593ee | Carles | AC, |
469 | 8ab593ee | Carles | atoms, |
470 | 8ab593ee | Carles | charge, |
471 | 8ab593ee | Carles | allow_charged_fragments=allow_charged_fragments, |
472 | 8ab593ee | Carles | use_graph=use_graph) |
473 | 8ab593ee | Carles | |
474 | 8ab593ee | Carles | # add BO connectivity and charge info to mol object
|
475 | 8ab593ee | Carles | mol = BO2mol( |
476 | 8ab593ee | Carles | mol, |
477 | 8ab593ee | Carles | BO, |
478 | 8ab593ee | Carles | atoms, |
479 | 8ab593ee | Carles | atomic_valence_electrons, |
480 | 8ab593ee | Carles | charge, |
481 | 8ab593ee | Carles | allow_charged_fragments=allow_charged_fragments) |
482 | 8ab593ee | Carles | |
483 | 8ab593ee | Carles | return mol
|
484 | 8ab593ee | Carles | |
485 | 8ab593ee | Carles | |
486 | 8ab593ee | Carles | def get_proto_mol(atoms): |
487 | 8ab593ee | Carles | """
|
488 | 8ab593ee | Carles | """
|
489 | 8ab593ee | Carles | mol = Chem.MolFromSmarts("[#" + str(atoms[0]) + "]") |
490 | 8ab593ee | Carles | rwMol = Chem.RWMol(mol) |
491 | 8ab593ee | Carles | for i in range(1, len(atoms)): |
492 | 8ab593ee | Carles | a = Chem.Atom(atoms[i]) |
493 | 8ab593ee | Carles | rwMol.AddAtom(a) |
494 | 8ab593ee | Carles | |
495 | 8ab593ee | Carles | mol = rwMol.GetMol() |
496 | 8ab593ee | Carles | |
497 | 8ab593ee | Carles | return mol
|
498 | 8ab593ee | Carles | |
499 | 8ab593ee | Carles | |
500 | 8ab593ee | Carles | def read_xyz_file(filename, look_for_charge=True): |
501 | 8ab593ee | Carles | """
|
502 | 8ab593ee | Carles | """
|
503 | 8ab593ee | Carles | |
504 | 8ab593ee | Carles | atomic_symbols = [] |
505 | 8ab593ee | Carles | xyz_coordinates = [] |
506 | 8ab593ee | Carles | charge = 0
|
507 | 8ab593ee | Carles | title = ""
|
508 | 8ab593ee | Carles | |
509 | 8ab593ee | Carles | with open(filename, "r") as file: |
510 | 8ab593ee | Carles | for line_number, line in enumerate(file): |
511 | 8ab593ee | Carles | if line_number == 0: |
512 | 8ab593ee | Carles | num_atoms = int(line)
|
513 | 8ab593ee | Carles | elif line_number == 1: |
514 | 8ab593ee | Carles | title = line |
515 | 8ab593ee | Carles | if "charge=" in line: #TODO: More flexible charge declaration |
516 | 8ab593ee | Carles | charge = int(line.split("=")[1]) |
517 | 8ab593ee | Carles | else:
|
518 | 8ab593ee | Carles | atomic_symbol, x, y, z = line.split() |
519 | 8ab593ee | Carles | atomic_symbols.append(atomic_symbol) |
520 | 8ab593ee | Carles | xyz_coordinates.append([float(x), float(y), float(z)]) |
521 | 8ab593ee | Carles | |
522 | 8ab593ee | Carles | atoms = [int_atom(atom) for atom in atomic_symbols] |
523 | 8ab593ee | Carles | |
524 | 8ab593ee | Carles | return atoms, charge, xyz_coordinates
|
525 | 8ab593ee | Carles | |
526 | 8ab593ee | Carles | |
527 | 8ab593ee | Carles | def xyz2AC(atoms, xyz, charge, use_huckel=False): |
528 | 8ab593ee | Carles | """
|
529 | 8ab593ee | Carles |
|
530 | 8ab593ee | Carles | atoms and coordinates to atom connectivity (AC)
|
531 | 8ab593ee | Carles |
|
532 | 8ab593ee | Carles | args:
|
533 | 8ab593ee | Carles | atoms - int atom types
|
534 | 8ab593ee | Carles | xyz - coordinates
|
535 | 8ab593ee | Carles | charge - molecule charge
|
536 | 8ab593ee | Carles |
|
537 | 8ab593ee | Carles | optional:
|
538 | 8ab593ee | Carles | use_huckel - Use Huckel method for atom connecitivty
|
539 | 8ab593ee | Carles |
|
540 | 8ab593ee | Carles | returns
|
541 | 8ab593ee | Carles | ac - atom connectivity matrix
|
542 | 8ab593ee | Carles | mol - rdkit molecule
|
543 | 8ab593ee | Carles |
|
544 | 8ab593ee | Carles | """
|
545 | 8ab593ee | Carles | |
546 | 8ab593ee | Carles | if use_huckel:
|
547 | 8ab593ee | Carles | return xyz2AC_huckel(atoms, xyz, charge)
|
548 | 8ab593ee | Carles | else:
|
549 | 8ab593ee | Carles | return xyz2AC_vdW(atoms, xyz)
|
550 | 8ab593ee | Carles | |
551 | 8ab593ee | Carles | |
552 | 8ab593ee | Carles | def xyz2AC_vdW(atoms, xyz): |
553 | 8ab593ee | Carles | |
554 | 8ab593ee | Carles | # Get mol template
|
555 | 8ab593ee | Carles | mol = get_proto_mol(atoms) |
556 | 8ab593ee | Carles | |
557 | 8ab593ee | Carles | # Set coordinates
|
558 | 8ab593ee | Carles | conf = Chem.Conformer(mol.GetNumAtoms()) |
559 | 8ab593ee | Carles | for i in range(mol.GetNumAtoms()): |
560 | 8ab593ee | Carles | conf.SetAtomPosition(i, (xyz[i][0], xyz[i][1], xyz[i][2])) |
561 | 8ab593ee | Carles | mol.AddConformer(conf) |
562 | 8ab593ee | Carles | |
563 | 8ab593ee | Carles | AC = get_AC(mol) |
564 | 8ab593ee | Carles | |
565 | 8ab593ee | Carles | return AC, mol
|
566 | 8ab593ee | Carles | |
567 | 8ab593ee | Carles | |
568 | 8ab593ee | Carles | def get_AC(mol, covalent_factor=1.3): |
569 | 8ab593ee | Carles | """
|
570 | 8ab593ee | Carles |
|
571 | 8ab593ee | Carles | Generate adjacent matrix from atoms and coordinates.
|
572 | 8ab593ee | Carles |
|
573 | 8ab593ee | Carles | AC is a (num_atoms, num_atoms) matrix with 1 being covalent bond and 0 is not
|
574 | 8ab593ee | Carles |
|
575 | 8ab593ee | Carles |
|
576 | 8ab593ee | Carles | covalent_factor - 1.3 is an arbitrary factor
|
577 | 8ab593ee | Carles |
|
578 | 8ab593ee | Carles | args:
|
579 | 8ab593ee | Carles | mol - rdkit molobj with 3D conformer
|
580 | 8ab593ee | Carles |
|
581 | 8ab593ee | Carles | optional
|
582 | 8ab593ee | Carles | covalent_factor - increase covalent bond length threshold with facto
|
583 | 8ab593ee | Carles |
|
584 | 8ab593ee | Carles | returns:
|
585 | 8ab593ee | Carles | AC - adjacent matrix
|
586 | 8ab593ee | Carles |
|
587 | 8ab593ee | Carles | """
|
588 | 8ab593ee | Carles | |
589 | 8ab593ee | Carles | # Calculate distance matrix
|
590 | 8ab593ee | Carles | dMat = Chem.Get3DDistanceMatrix(mol) |
591 | 8ab593ee | Carles | |
592 | 8ab593ee | Carles | pt = Chem.GetPeriodicTable() |
593 | 8ab593ee | Carles | num_atoms = mol.GetNumAtoms() |
594 | 8ab593ee | Carles | AC = np.zeros((num_atoms, num_atoms), dtype=int)
|
595 | 8ab593ee | Carles | |
596 | 8ab593ee | Carles | for i in range(num_atoms): |
597 | 8ab593ee | Carles | a_i = mol.GetAtomWithIdx(i) |
598 | 8ab593ee | Carles | Rcov_i = pt.GetRcovalent(a_i.GetAtomicNum()) * covalent_factor |
599 | 8ab593ee | Carles | for j in range(i + 1, num_atoms): |
600 | 8ab593ee | Carles | a_j = mol.GetAtomWithIdx(j) |
601 | 8ab593ee | Carles | Rcov_j = pt.GetRcovalent(a_j.GetAtomicNum()) * covalent_factor |
602 | 8ab593ee | Carles | if dMat[i, j] <= Rcov_i + Rcov_j:
|
603 | 8ab593ee | Carles | AC[i, j] = 1
|
604 | 8ab593ee | Carles | AC[j, i] = 1
|
605 | 8ab593ee | Carles | |
606 | 8ab593ee | Carles | return AC
|
607 | 8ab593ee | Carles | |
608 | 8ab593ee | Carles | |
609 | 8ab593ee | Carles | def xyz2AC_huckel(atomicNumList,xyz,charge): |
610 | 8ab593ee | Carles | """
|
611 | 8ab593ee | Carles |
|
612 | 8ab593ee | Carles | args
|
613 | 8ab593ee | Carles | atomicNumList - atom type list
|
614 | 8ab593ee | Carles | xyz - coordinates
|
615 | 8ab593ee | Carles | charge - molecule charge
|
616 | 8ab593ee | Carles |
|
617 | 8ab593ee | Carles | returns
|
618 | 8ab593ee | Carles | ac - atom connectivity
|
619 | 8ab593ee | Carles | mol - rdkit molecule
|
620 | 8ab593ee | Carles |
|
621 | 8ab593ee | Carles | """
|
622 | 8ab593ee | Carles | mol = get_proto_mol(atomicNumList) |
623 | 8ab593ee | Carles | |
624 | 8ab593ee | Carles | conf = Chem.Conformer(mol.GetNumAtoms()) |
625 | 8ab593ee | Carles | for i in range(mol.GetNumAtoms()): |
626 | 8ab593ee | Carles | conf.SetAtomPosition(i,(xyz[i][0],xyz[i][1],xyz[i][2])) |
627 | 8ab593ee | Carles | mol.AddConformer(conf) |
628 | 8ab593ee | Carles | |
629 | 8ab593ee | Carles | num_atoms = len(atomicNumList)
|
630 | 8ab593ee | Carles | AC = np.zeros((num_atoms,num_atoms)).astype(int)
|
631 | 8ab593ee | Carles | |
632 | 8ab593ee | Carles | mol_huckel = Chem.Mol(mol) |
633 | 8ab593ee | Carles | mol_huckel.GetAtomWithIdx(0).SetFormalCharge(charge) #mol charge arbitrarily added to 1st atom |
634 | 8ab593ee | Carles | |
635 | 8ab593ee | Carles | passed,result = rdEHTTools.RunMol(mol_huckel) |
636 | 8ab593ee | Carles | opop = result.GetReducedOverlapPopulationMatrix() |
637 | 8ab593ee | Carles | tri = np.zeros((num_atoms, num_atoms)) |
638 | 8ab593ee | Carles | tri[np.tril(np.ones((num_atoms, num_atoms), dtype=bool))] = opop #lower triangular to square matrix |
639 | 8ab593ee | Carles | for i in range(num_atoms): |
640 | 8ab593ee | Carles | for j in range(i+1,num_atoms): |
641 | 8ab593ee | Carles | pair_pop = abs(tri[j,i])
|
642 | 8ab593ee | Carles | if pair_pop >= 0.15: #arbitry cutoff for bond. May need adjustment |
643 | 8ab593ee | Carles | AC[i,j] = 1
|
644 | 8ab593ee | Carles | AC[j,i] = 1
|
645 | 8ab593ee | Carles | |
646 | 8ab593ee | Carles | return AC, mol
|
647 | 8ab593ee | Carles | |
648 | 8ab593ee | Carles | |
649 | 8ab593ee | Carles | def chiral_stereo_check(mol): |
650 | 8ab593ee | Carles | """
|
651 | 8ab593ee | Carles | Find and embed chiral information into the model based on the coordinates
|
652 | 8ab593ee | Carles |
|
653 | 8ab593ee | Carles | args:
|
654 | 8ab593ee | Carles | mol - rdkit molecule, with embeded conformer
|
655 | 8ab593ee | Carles |
|
656 | 8ab593ee | Carles | """
|
657 | 8ab593ee | Carles | Chem.SanitizeMol(mol) |
658 | 8ab593ee | Carles | Chem.DetectBondStereochemistry(mol, -1)
|
659 | 8ab593ee | Carles | Chem.AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True) |
660 | 8ab593ee | Carles | Chem.AssignAtomChiralTagsFromStructure(mol, -1)
|
661 | 8ab593ee | Carles | |
662 | 8ab593ee | Carles | return
|
663 | 8ab593ee | Carles | |
664 | 8ab593ee | Carles | |
665 | 8ab593ee | Carles | def xyz2mol(atoms, coordinates, |
666 | 8ab593ee | Carles | charge=0,
|
667 | 8ab593ee | Carles | allow_charged_fragments=True,
|
668 | 8ab593ee | Carles | use_graph=True,
|
669 | 8ab593ee | Carles | use_huckel=False,
|
670 | 8ab593ee | Carles | embed_chiral=True):
|
671 | 8ab593ee | Carles | """
|
672 | 8ab593ee | Carles | Generate a rdkit molobj from atoms, coordinates and a total_charge.
|
673 | 8ab593ee | Carles |
|
674 | 8ab593ee | Carles | args:
|
675 | 8ab593ee | Carles | atoms - list of atom types (int)
|
676 | 8ab593ee | Carles | coordinates - 3xN Cartesian coordinates
|
677 | 8ab593ee | Carles | charge - total charge of the system (default: 0)
|
678 | 8ab593ee | Carles |
|
679 | 8ab593ee | Carles | optional:
|
680 | 8ab593ee | Carles | allow_charged_fragments - alternatively radicals are made
|
681 | 8ab593ee | Carles | use_graph - use graph (networkx)
|
682 | 8ab593ee | Carles | use_huckel - Use Huckel method for atom connectivity prediction
|
683 | 8ab593ee | Carles | embed_chiral - embed chiral information to the molecule
|
684 | 8ab593ee | Carles |
|
685 | 8ab593ee | Carles | returns:
|
686 | 8ab593ee | Carles | mol - rdkit molobj
|
687 | 8ab593ee | Carles |
|
688 | 8ab593ee | Carles | """
|
689 | 8ab593ee | Carles | |
690 | 8ab593ee | Carles | # Get atom connectivity (AC) matrix, list of atomic numbers, molecular charge,
|
691 | 8ab593ee | Carles | # and mol object with no connectivity information
|
692 | 5bfea9b9 | Carles | # print("atoms variable: ", atoms)
|
693 | 8ab593ee | Carles | AC, mol = xyz2AC(atoms, coordinates, charge, use_huckel=use_huckel) |
694 | 8ab593ee | Carles | |
695 | 8ab593ee | Carles | # Convert AC to bond order matrix and add connectivity and charge info to
|
696 | 8ab593ee | Carles | # mol object
|
697 | 8ab593ee | Carles | new_mol = AC2mol(mol, AC, atoms, charge, |
698 | 8ab593ee | Carles | allow_charged_fragments=allow_charged_fragments, |
699 | 8ab593ee | Carles | use_graph=use_graph) |
700 | 8ab593ee | Carles | |
701 | 8ab593ee | Carles | # Check for stereocenters and chiral centers
|
702 | 8ab593ee | Carles | if embed_chiral:
|
703 | 8ab593ee | Carles | chiral_stereo_check(new_mol) |
704 | 8ab593ee | Carles | |
705 | 8ab593ee | Carles | return new_mol
|
706 | 8ab593ee | Carles | |
707 | 8ab593ee | Carles | |
708 | 8ab593ee | Carles | def main(): |
709 | 8ab593ee | Carles | |
710 | 8ab593ee | Carles | |
711 | 8ab593ee | Carles | return
|
712 | 8ab593ee | Carles | |
713 | 8ab593ee | Carles | |
714 | 8ab593ee | Carles | if __name__ == "__main__": |
715 | 8ab593ee | Carles | |
716 | 8ab593ee | Carles | import argparse |
717 | 8ab593ee | Carles | |
718 | 8ab593ee | Carles | parser = argparse.ArgumentParser() # usage='%(prog)s [options] molecule.xyz')
|
719 | 8ab593ee | Carles | parser.add_argument('structure', metavar='structure', type=str) |
720 | 8ab593ee | Carles | parser.add_argument('-s', '--sdf', |
721 | 8ab593ee | Carles | action="store_true",
|
722 | 8ab593ee | Carles | help="Dump sdf file")
|
723 | 8ab593ee | Carles | parser.add_argument('--ignore-chiral',
|
724 | 8ab593ee | Carles | action="store_true",
|
725 | 8ab593ee | Carles | help="Ignore chiral centers")
|
726 | 8ab593ee | Carles | parser.add_argument('--no-charged-fragments',
|
727 | 8ab593ee | Carles | action="store_true",
|
728 | 8ab593ee | Carles | help="Allow radicals to be made")
|
729 | 8ab593ee | Carles | parser.add_argument('--no-graph',
|
730 | 8ab593ee | Carles | action="store_true",
|
731 | 8ab593ee | Carles | help="Run xyz2mol without networkx dependencies")
|
732 | 8ab593ee | Carles | |
733 | 8ab593ee | Carles | # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
|
734 | 8ab593ee | Carles | # otherwise van der Waals radii are used
|
735 | 8ab593ee | Carles | parser.add_argument('--use-huckel',
|
736 | 8ab593ee | Carles | action="store_true",
|
737 | 8ab593ee | Carles | help="Use Huckel method for atom connectivity")
|
738 | 8ab593ee | Carles | parser.add_argument('-o', '--output-format', |
739 | 8ab593ee | Carles | action="store",
|
740 | 8ab593ee | Carles | type=str,
|
741 | 8ab593ee | Carles | help="Output format [smiles,sdf] (default=sdf)")
|
742 | 8ab593ee | Carles | parser.add_argument('-c', '--charge', |
743 | 8ab593ee | Carles | action="store",
|
744 | 8ab593ee | Carles | metavar="int",
|
745 | 8ab593ee | Carles | type=int,
|
746 | 8ab593ee | Carles | help="Total charge of the system")
|
747 | 8ab593ee | Carles | |
748 | 8ab593ee | Carles | args = parser.parse_args() |
749 | 8ab593ee | Carles | |
750 | 8ab593ee | Carles | # read xyz file
|
751 | 8ab593ee | Carles | filename = args.structure |
752 | 8ab593ee | Carles | |
753 | 8ab593ee | Carles | # allow for charged fragments, alternatively radicals are made
|
754 | 8ab593ee | Carles | charged_fragments = not args.no_charged_fragments
|
755 | 8ab593ee | Carles | |
756 | 8ab593ee | Carles | # quick is faster for large systems but requires networkx
|
757 | 8ab593ee | Carles | # if you don't want to install networkx set quick=False and
|
758 | 8ab593ee | Carles | # uncomment 'import networkx as nx' at the top of the file
|
759 | 8ab593ee | Carles | quick = not args.no_graph
|
760 | 8ab593ee | Carles | |
761 | 8ab593ee | Carles | # chiral comment
|
762 | 8ab593ee | Carles | embed_chiral = not args.ignore_chiral
|
763 | 8ab593ee | Carles | |
764 | 8ab593ee | Carles | # read atoms and coordinates. Try to find the charge
|
765 | 8ab593ee | Carles | atoms, charge, xyz_coordinates = read_xyz_file(filename) |
766 | 8ab593ee | Carles | |
767 | 8ab593ee | Carles | # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
|
768 | 8ab593ee | Carles | # otherwise van der Waals radii are used
|
769 | 8ab593ee | Carles | use_huckel = args.use_huckel |
770 | 8ab593ee | Carles | |
771 | 8ab593ee | Carles | # if explicit charge from args, set it
|
772 | 8ab593ee | Carles | if args.charge is not None: |
773 | 8ab593ee | Carles | charge = int(args.charge)
|
774 | 8ab593ee | Carles | |
775 | 8ab593ee | Carles | # Get the molobj
|
776 | 8ab593ee | Carles | mol = xyz2mol(atoms, xyz_coordinates, |
777 | 8ab593ee | Carles | charge=charge, |
778 | 8ab593ee | Carles | use_graph=quick, |
779 | 8ab593ee | Carles | allow_charged_fragments=charged_fragments, |
780 | 8ab593ee | Carles | embed_chiral=embed_chiral, |
781 | 8ab593ee | Carles | use_huckel=use_huckel) |
782 | 8ab593ee | Carles | |
783 | 8ab593ee | Carles | # Print output
|
784 | 8ab593ee | Carles | if args.output_format == "sdf": |
785 | 8ab593ee | Carles | txt = Chem.MolToMolBlock(mol) |
786 | 8ab593ee | Carles | print(txt) |
787 | 8ab593ee | Carles | |
788 | 8ab593ee | Carles | else:
|
789 | 8ab593ee | Carles | # Canonical hack
|
790 | 8ab593ee | Carles | isomeric_smiles = not args.ignore_chiral
|
791 | 8ab593ee | Carles | smiles = Chem.MolToSmiles(mol, isomericSmiles=isomeric_smiles) |
792 | 8ab593ee | Carles | m = Chem.MolFromSmiles(smiles) |
793 | 8ab593ee | Carles | smiles = Chem.MolToSmiles(m, isomericSmiles=isomeric_smiles) |
794 | 8ab593ee | Carles | print(smiles) |