Révision 8ab593ee

b/modules/read_coords.py
1
def read_coords(requirement, coord_file):
2
    from ase.io.formats import filetype
3

  
4
    req_vals = ['rdkit', 'ase']
5
    file_type_vals = ['xyz', 'mol']
6
    assert requirement in req_vals, ValueError('Not an adequate requirement.\n'
7
                                    f'adequate requirements: {req_vals}')
8
    assert filetype(coord_file) in file_type_vals, TypeError('The file formnat '
9
                                                             'is not supported')
10

  
11
    if requirement == 'rdkit':
12
        if filetype(coord_file) == 'xyz':
13
            from xyz2mol import xyz2mol, read_xyz_file
14
            atoms, charge, xyz_coordinates = read_xyz_file(coord_file)
15
            rd_mol_obj = xyz2mol(atoms, xyz_coordinates, charge=charge)
16
            return rd_mol_obj
17
        elif filetype(coord_file) == 'mol':
18
            from rdkit.Chem import MolFromMolFile
19
            return MolFromMolFile(coord_file)
20

  
21
    if requirement == 'ase':
22
        import ase.io
23
        return ase.io.read(coord_file)
b/modules/xyz2mol.py
1
"""
2
Module for generating rdkit molobj/smiles/molecular graph from free atoms
3

  
4
Implementation by Jan H. Jensen, based on the paper
5

  
6
    Yeonjoon Kim and Woo Youn Kim
7
    "Universal Structure Conversion Method for Organic Molecules: From Atomic Connectivity
8
    to Three-Dimensional Geometry"
9
    Bull. Korean Chem. Soc. 2015, Vol. 36, 1769-1777
10
    DOI: 10.1002/bkcs.10334
11

  
12
"""
13

  
14
import copy
15
import itertools
16

  
17
from rdkit.Chem import rdmolops
18
try:
19
    from rdkit.Chem import rdEHTTools #requires RDKit 2019.9.1 or later
20
except ImportError:
21
    rdEHTTools = None
22

  
23
from collections import defaultdict
24

  
25
import numpy as np
26
import networkx as nx
27

  
28
from rdkit import Chem
29
from rdkit.Chem import AllChem, rdmolops
30

  
31
global __ATOM_LIST__
32
__ATOM_LIST__ = \
33
    ['h',  'he',
34
     'li', 'be', 'b',  'c',  'n',  'o',  'f',  'ne',
35
     'na', 'mg', 'al', 'si', 'p',  's',  'cl', 'ar',
36
     'k',  'ca', 'sc', 'ti', 'v ', 'cr', 'mn', 'fe', 'co', 'ni', 'cu',
37
     'zn', 'ga', 'ge', 'as', 'se', 'br', 'kr',
38
     'rb', 'sr', 'y',  'zr', 'nb', 'mo', 'tc', 'ru', 'rh', 'pd', 'ag',
39
     'cd', 'in', 'sn', 'sb', 'te', 'i',  'xe',
40
     'cs', 'ba', 'la', 'ce', 'pr', 'nd', 'pm', 'sm', 'eu', 'gd', 'tb', 'dy',
41
     'ho', 'er', 'tm', 'yb', 'lu', 'hf', 'ta', 'w',  're', 'os', 'ir', 'pt',
42
     'au', 'hg', 'tl', 'pb', 'bi', 'po', 'at', 'rn',
43
     'fr', 'ra', 'ac', 'th', 'pa', 'u',  'np', 'pu']
44

  
45

  
46
global atomic_valence
47
global atomic_valence_electrons
48

  
49
atomic_valence = defaultdict(list)
50
atomic_valence[1] = [1]
51
atomic_valence[6] = [4]
52
atomic_valence[7] = [3,4]
53
atomic_valence[8] = [2,1]
54
atomic_valence[9] = [1]
55
atomic_valence[14] = [4]
56
atomic_valence[15] = [5,3] #[5,4,3]
57
atomic_valence[16] = [6,3,2] #[6,4,2]
58
atomic_valence[17] = [1]
59
atomic_valence[32] = [4]
60
atomic_valence[35] = [1]
61
atomic_valence[53] = [1]
62

  
63
atomic_valence_electrons = {}
64
atomic_valence_electrons[1] = 1
65
atomic_valence_electrons[6] = 4
66
atomic_valence_electrons[7] = 5
67
atomic_valence_electrons[8] = 6
68
atomic_valence_electrons[9] = 7
69
atomic_valence_electrons[14] = 4
70
atomic_valence_electrons[15] = 5
71
atomic_valence_electrons[16] = 6
72
atomic_valence_electrons[17] = 7
73
atomic_valence_electrons[32] = 4
74
atomic_valence_electrons[35] = 7
75
atomic_valence_electrons[53] = 7
76

  
77

  
78
def str_atom(atom):
79
    """
80
    convert integer atom to string atom
81
    """
82
    global __ATOM_LIST__
83
    atom = __ATOM_LIST__[atom - 1]
84
    return atom
85

  
86

  
87
def int_atom(atom):
88
    """
89
    convert str atom to integer atom
90
    """
91
    global __ATOM_LIST__
92
    print(atom)
93
    atom = atom.lower()
94
    return __ATOM_LIST__.index(atom) + 1
95

  
96

  
97
def get_UA(maxValence_list, valence_list):
98
    """
99
    """
100
    UA = []
101
    DU = []
102
    for i, (maxValence, valence) in enumerate(zip(maxValence_list, valence_list)):
103
        if not maxValence - valence > 0:
104
            continue
105
        UA.append(i)
106
        DU.append(maxValence - valence)
107
    return UA, DU
108

  
109

  
110
def get_BO(AC, UA, DU, valences, UA_pairs, use_graph=True):
111
    """
112
    """
113
    BO = AC.copy()
114
    DU_save = []
115

  
116
    while DU_save != DU:
117
        for i, j in UA_pairs:
118
            BO[i, j] += 1
119
            BO[j, i] += 1
120

  
121
        BO_valence = list(BO.sum(axis=1))
122
        DU_save = copy.copy(DU)
123
        UA, DU = get_UA(valences, BO_valence)
124
        UA_pairs = get_UA_pairs(UA, AC, use_graph=use_graph)[0]
125

  
126
    return BO
127

  
128

  
129
def valences_not_too_large(BO, valences):
130
    """
131
    """
132
    number_of_bonds_list = BO.sum(axis=1)
133
    for valence, number_of_bonds in zip(valences, number_of_bonds_list):
134
        if number_of_bonds > valence:
135
            return False
136

  
137
    return True
138

  
139

  
140
def BO_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valances,
141
    allow_charged_fragments=True):
142
    """
143
    Sanity of bond-orders
144

  
145
    args:
146
        BO -
147
        AC -
148
        charge -
149
        DU - 
150

  
151

  
152
    optional
153
        allow_charges_fragments - 
154

  
155

  
156
    returns:
157
        boolean - true of molecule is OK, false if not
158
    """
159

  
160
    if not valences_not_too_large(BO, valances):
161
        return False
162

  
163
    # total charge
164
    Q = 0
165

  
166
    # charge fragment list
167
    q_list = []
168

  
169
    if allow_charged_fragments:
170

  
171
        BO_valences = list(BO.sum(axis=1))
172
        for i, atom in enumerate(atoms):
173
            q = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i])
174
            Q += q
175
            if atom == 6:
176
                number_of_single_bonds_to_C = list(BO[i, :]).count(1)
177
                if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
178
                    Q += 1
179
                    q = 2
180
                if number_of_single_bonds_to_C == 3 and Q + 1 < charge:
181
                    Q += 2
182
                    q = 1
183

  
184
            if q != 0:
185
                q_list.append(q)
186

  
187
    check_sum = (BO - AC).sum() == sum(DU)
188
    check_charge = charge == Q
189
    # check_len = len(q_list) <= abs(charge)
190

  
191
    if check_sum and check_charge:
192
        return True
193

  
194
    return False
195

  
196

  
197
def get_atomic_charge(atom, atomic_valence_electrons, BO_valence):
198
    """
199
    """
200

  
201
    if atom == 1:
202
        charge = 1 - BO_valence
203
    elif atom == 5:
204
        charge = 3 - BO_valence
205
    elif atom == 15 and BO_valence == 5:
206
        charge = 0
207
    elif atom == 16 and BO_valence == 6:
208
        charge = 0
209
    else:
210
        charge = atomic_valence_electrons - 8 + BO_valence
211

  
212
    return charge
213

  
214

  
215
def clean_charges(mol):
216
    """
217
    This hack should not be needed anymore, but is kept just in case
218

  
219
    """
220

  
221
    Chem.SanitizeMol(mol)
222
    #rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',
223
    #              '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]',
224
    #              '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]',
225
    #              '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]',
226
    #              '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]',
227
    #              '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]']
228

  
229
    rxn_smarts = ['[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][CX3-,NX3-:5][#6,#7:6]1=[#6,#7:7]>>'
230
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][-0,-0:5]=[#6,#7:6]1[#6-,#7-:7]',
231
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3](=[#6,#7:4])[#6,#7:5]=[#6,#7:6][CX3-,NX3-:7]1>>'
232
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3]([#6-,#7-:4])=[#6,#7:5][#6,#7:6]=[-0,-0:7]1']
233

  
234
    fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False)
235

  
236
    for i, fragment in enumerate(fragments):
237
        for smarts in rxn_smarts:
238
            patt = Chem.MolFromSmarts(smarts.split(">>")[0])
239
            while fragment.HasSubstructMatch(patt):
240
                rxn = AllChem.ReactionFromSmarts(smarts)
241
                ps = rxn.RunReactants((fragment,))
242
                fragment = ps[0][0]
243
                Chem.SanitizeMol(fragment)
244
        if i == 0:
245
            mol = fragment
246
        else:
247
            mol = Chem.CombineMols(mol, fragment)
248

  
249
    return mol
250

  
251

  
252
def BO2mol(mol, BO_matrix, atoms, atomic_valence_electrons,
253
           mol_charge, allow_charged_fragments=True):
254
    """
255
    based on code written by Paolo Toscani
256

  
257
    From bond order, atoms, valence structure and total charge, generate an
258
    rdkit molecule.
259

  
260
    args:
261
        mol - rdkit molecule
262
        BO_matrix - bond order matrix of molecule
263
        atoms - list of integer atomic symbols
264
        atomic_valence_electrons -
265
        mol_charge - total charge of molecule
266

  
267
    optional:
268
        allow_charged_fragments - bool - allow charged fragments
269

  
270
    returns
271
        mol - updated rdkit molecule with bond connectivity
272

  
273
    """
274

  
275
    l = len(BO_matrix)
276
    l2 = len(atoms)
277
    BO_valences = list(BO_matrix.sum(axis=1))
278

  
279
    if (l != l2):
280
        raise RuntimeError('sizes of adjMat ({0:d}) and Atoms {1:d} differ'.format(l, l2))
281

  
282
    rwMol = Chem.RWMol(mol)
283

  
284
    bondTypeDict = {
285
        1: Chem.BondType.SINGLE,
286
        2: Chem.BondType.DOUBLE,
287
        3: Chem.BondType.TRIPLE
288
    }
289

  
290
    for i in range(l):
291
        for j in range(i + 1, l):
292
            bo = int(round(BO_matrix[i, j]))
293
            if (bo == 0):
294
                continue
295
            bt = bondTypeDict.get(bo, Chem.BondType.SINGLE)
296
            rwMol.AddBond(i, j, bt)
297

  
298
    mol = rwMol.GetMol()
299

  
300
    if allow_charged_fragments:
301
        mol = set_atomic_charges(
302
            mol,
303
            atoms,
304
            atomic_valence_electrons,
305
            BO_valences,
306
            BO_matrix,
307
            mol_charge)
308
    else:
309
        mol = set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences)
310

  
311
    return mol
312

  
313

  
314
def set_atomic_charges(mol, atoms, atomic_valence_electrons,
315
                       BO_valences, BO_matrix, mol_charge):
316
    """
317
    """
318
    q = 0
319
    for i, atom in enumerate(atoms):
320
        a = mol.GetAtomWithIdx(i)
321
        charge = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i])
322
        q += charge
323
        if atom == 6:
324
            number_of_single_bonds_to_C = list(BO_matrix[i, :]).count(1)
325
            if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
326
                q += 1
327
                charge = 0
328
            if number_of_single_bonds_to_C == 3 and q + 1 < mol_charge:
329
                q += 2
330
                charge = 1
331

  
332
        if (abs(charge) > 0):
333
            a.SetFormalCharge(int(charge))
334

  
335
    mol = clean_charges(mol)
336

  
337
    return mol
338

  
339

  
340
def set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences):
341
    """
342

  
343
    The number of radical electrons = absolute atomic charge
344

  
345
    """
346
    for i, atom in enumerate(atoms):
347
        a = mol.GetAtomWithIdx(i)
348
        charge = get_atomic_charge(
349
            atom,
350
            atomic_valence_electrons[atom],
351
            BO_valences[i])
352

  
353
        if (abs(charge) > 0):
354
            a.SetNumRadicalElectrons(abs(int(charge)))
355

  
356
    return mol
357

  
358

  
359
def get_bonds(UA, AC):
360
    """
361

  
362
    """
363
    bonds = []
364

  
365
    for k, i in enumerate(UA):
366
        for j in UA[k + 1:]:
367
            if AC[i, j] == 1:
368
                bonds.append(tuple(sorted([i, j])))
369

  
370
    return bonds
371

  
372

  
373
def get_UA_pairs(UA, AC, use_graph=True):
374
    """
375

  
376
    """
377

  
378
    bonds = get_bonds(UA, AC)
379

  
380
    if len(bonds) == 0:
381
        return [()]
382

  
383
    if use_graph:
384
        G = nx.Graph()
385
        G.add_edges_from(bonds)
386
        UA_pairs = [list(nx.max_weight_matching(G))]
387
        return UA_pairs
388

  
389
    max_atoms_in_combo = 0
390
    UA_pairs = [()]
391
    for combo in list(itertools.combinations(bonds, int(len(UA) / 2))):
392
        flat_list = [item for sublist in combo for item in sublist]
393
        atoms_in_combo = len(set(flat_list))
394
        if atoms_in_combo > max_atoms_in_combo:
395
            max_atoms_in_combo = atoms_in_combo
396
            UA_pairs = [combo]
397

  
398
        elif atoms_in_combo == max_atoms_in_combo:
399
            UA_pairs.append(combo)
400

  
401
    return UA_pairs
402

  
403

  
404
def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
405
    """
406

  
407
    implemenation of algorithm shown in Figure 2
408

  
409
    UA: unsaturated atoms
410

  
411
    DU: degree of unsaturation (u matrix in Figure)
412

  
413
    best_BO: Bcurr in Figure
414

  
415
    """
416

  
417
    global atomic_valence
418
    global atomic_valence_electrons
419

  
420
    # make a list of valences, e.g. for CO: [[4],[2,1]]
421
    valences_list_of_lists = []
422
    AC_valence = list(AC.sum(axis=1))
423
    for atomicNum in atoms:
424
        valences_list_of_lists.append(atomic_valence[atomicNum])
425

  
426
    # convert [[4],[2,1]] to [[4,2],[4,1]]
427
    valences_list = itertools.product(*valences_list_of_lists)
428

  
429
    best_BO = AC.copy()
430

  
431
    for valences in valences_list:
432

  
433
        UA, DU_from_AC = get_UA(valences, AC_valence)
434

  
435
        check_len = (len(UA) == 0)
436
        if check_len:
437
            check_bo = BO_is_OK(AC, AC, charge, DU_from_AC,
438
                atomic_valence_electrons, atoms, valences,
439
                allow_charged_fragments=allow_charged_fragments)
440
        else:
441
            check_bo = None
442

  
443
        if check_len and check_bo:
444
            return AC, atomic_valence_electrons
445

  
446
        UA_pairs_list = get_UA_pairs(UA, AC, use_graph=use_graph)
447
        for UA_pairs in UA_pairs_list:
448
            BO = get_BO(AC, UA, DU_from_AC, valences, UA_pairs, use_graph=use_graph)
449
            status = BO_is_OK(BO, AC, charge, DU_from_AC,
450
                        atomic_valence_electrons, atoms, valences,
451
                        allow_charged_fragments=allow_charged_fragments)
452

  
453
            if status:
454
                return BO, atomic_valence_electrons
455

  
456
            elif BO.sum() >= best_BO.sum() and valences_not_too_large(BO, valences):
457
                best_BO = BO.copy()
458

  
459
    return best_BO, atomic_valence_electrons
460

  
461

  
462
def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
463
    """
464
    """
465

  
466
    # convert AC matrix to bond order (BO) matrix
467
    BO, atomic_valence_electrons = AC2BO(
468
        AC,
469
        atoms,
470
        charge,
471
        allow_charged_fragments=allow_charged_fragments,
472
        use_graph=use_graph)
473

  
474
    # add BO connectivity and charge info to mol object
475
    mol = BO2mol(
476
        mol,
477
        BO,
478
        atoms,
479
        atomic_valence_electrons,
480
        charge,
481
        allow_charged_fragments=allow_charged_fragments)
482

  
483
    return mol
484

  
485

  
486
def get_proto_mol(atoms):
487
    """
488
    """
489
    mol = Chem.MolFromSmarts("[#" + str(atoms[0]) + "]")
490
    rwMol = Chem.RWMol(mol)
491
    for i in range(1, len(atoms)):
492
        a = Chem.Atom(atoms[i])
493
        rwMol.AddAtom(a)
494

  
495
    mol = rwMol.GetMol()
496

  
497
    return mol
498

  
499

  
500
def read_xyz_file(filename, look_for_charge=True):
501
    """
502
    """
503

  
504
    atomic_symbols = []
505
    xyz_coordinates = []
506
    charge = 0
507
    title = ""
508

  
509
    with open(filename, "r") as file:
510
        for line_number, line in enumerate(file):
511
            if line_number == 0:
512
                num_atoms = int(line)
513
            elif line_number == 1:
514
                title = line
515
                if "charge=" in line: #TODO: More flexible charge declaration
516
                    charge = int(line.split("=")[1])
517
            else:
518
                atomic_symbol, x, y, z = line.split()
519
                atomic_symbols.append(atomic_symbol)
520
                xyz_coordinates.append([float(x), float(y), float(z)])
521

  
522
    atoms = [int_atom(atom) for atom in atomic_symbols]
523

  
524
    return atoms, charge, xyz_coordinates
525

  
526

  
527
def xyz2AC(atoms, xyz, charge, use_huckel=False):
528
    """
529

  
530
    atoms and coordinates to atom connectivity (AC)
531

  
532
    args:
533
        atoms - int atom types
534
        xyz - coordinates
535
        charge - molecule charge
536

  
537
    optional:
538
        use_huckel - Use Huckel method for atom connecitivty
539

  
540
    returns
541
        ac - atom connectivity matrix
542
        mol - rdkit molecule
543

  
544
    """
545

  
546
    if use_huckel:
547
        return xyz2AC_huckel(atoms, xyz, charge)
548
    else:
549
        return xyz2AC_vdW(atoms, xyz)
550

  
551

  
552
def xyz2AC_vdW(atoms, xyz):
553

  
554
    # Get mol template
555
    mol = get_proto_mol(atoms)
556

  
557
    # Set coordinates
558
    conf = Chem.Conformer(mol.GetNumAtoms())
559
    for i in range(mol.GetNumAtoms()):
560
        conf.SetAtomPosition(i, (xyz[i][0], xyz[i][1], xyz[i][2]))
561
    mol.AddConformer(conf)
562

  
563
    AC = get_AC(mol)
564

  
565
    return AC, mol
566

  
567

  
568
def get_AC(mol, covalent_factor=1.3):
569
    """
570

  
571
    Generate adjacent matrix from atoms and coordinates.
572

  
573
    AC is a (num_atoms, num_atoms) matrix with 1 being covalent bond and 0 is not
574

  
575

  
576
    covalent_factor - 1.3 is an arbitrary factor
577

  
578
    args:
579
        mol - rdkit molobj with 3D conformer
580

  
581
    optional
582
        covalent_factor - increase covalent bond length threshold with facto
583

  
584
    returns:
585
        AC - adjacent matrix
586

  
587
    """
588

  
589
    # Calculate distance matrix
590
    dMat = Chem.Get3DDistanceMatrix(mol)
591

  
592
    pt = Chem.GetPeriodicTable()
593
    num_atoms = mol.GetNumAtoms()
594
    AC = np.zeros((num_atoms, num_atoms), dtype=int)
595

  
596
    for i in range(num_atoms):
597
        a_i = mol.GetAtomWithIdx(i)
598
        Rcov_i = pt.GetRcovalent(a_i.GetAtomicNum()) * covalent_factor
599
        for j in range(i + 1, num_atoms):
600
            a_j = mol.GetAtomWithIdx(j)
601
            Rcov_j = pt.GetRcovalent(a_j.GetAtomicNum()) * covalent_factor
602
            if dMat[i, j] <= Rcov_i + Rcov_j:
603
                AC[i, j] = 1
604
                AC[j, i] = 1
605

  
606
    return AC
607

  
608

  
609
def xyz2AC_huckel(atomicNumList,xyz,charge):
610
    """
611

  
612
    args
613
        atomicNumList - atom type list
614
        xyz - coordinates
615
        charge - molecule charge
616

  
617
    returns
618
        ac - atom connectivity
619
        mol - rdkit molecule
620

  
621
    """
622
    mol = get_proto_mol(atomicNumList)
623

  
624
    conf = Chem.Conformer(mol.GetNumAtoms())
625
    for i in range(mol.GetNumAtoms()):
626
        conf.SetAtomPosition(i,(xyz[i][0],xyz[i][1],xyz[i][2]))
627
    mol.AddConformer(conf)
628

  
629
    num_atoms = len(atomicNumList)
630
    AC = np.zeros((num_atoms,num_atoms)).astype(int)
631

  
632
    mol_huckel = Chem.Mol(mol)
633
    mol_huckel.GetAtomWithIdx(0).SetFormalCharge(charge) #mol charge arbitrarily added to 1st atom    
634

  
635
    passed,result = rdEHTTools.RunMol(mol_huckel)
636
    opop = result.GetReducedOverlapPopulationMatrix()
637
    tri = np.zeros((num_atoms, num_atoms))
638
    tri[np.tril(np.ones((num_atoms, num_atoms), dtype=bool))] = opop #lower triangular to square matrix
639
    for i in range(num_atoms):
640
        for j in range(i+1,num_atoms):
641
            pair_pop = abs(tri[j,i])   
642
            if pair_pop >= 0.15: #arbitry cutoff for bond. May need adjustment
643
                AC[i,j] = 1
644
                AC[j,i] = 1
645

  
646
    return AC, mol
647

  
648

  
649
def chiral_stereo_check(mol):
650
    """
651
    Find and embed chiral information into the model based on the coordinates
652

  
653
    args:
654
        mol - rdkit molecule, with embeded conformer
655

  
656
    """
657
    Chem.SanitizeMol(mol)
658
    Chem.DetectBondStereochemistry(mol, -1)
659
    Chem.AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True)
660
    Chem.AssignAtomChiralTagsFromStructure(mol, -1)
661

  
662
    return
663

  
664

  
665
def xyz2mol(atoms, coordinates,
666
    charge=0,
667
    allow_charged_fragments=True,
668
    use_graph=True,
669
    use_huckel=False,
670
    embed_chiral=True):
671
    """
672
    Generate a rdkit molobj from atoms, coordinates and a total_charge.
673

  
674
    args:
675
        atoms - list of atom types (int)
676
        coordinates - 3xN Cartesian coordinates
677
        charge - total charge of the system (default: 0)
678

  
679
    optional:
680
        allow_charged_fragments - alternatively radicals are made
681
        use_graph - use graph (networkx)
682
        use_huckel - Use Huckel method for atom connectivity prediction
683
        embed_chiral - embed chiral information to the molecule
684

  
685
    returns:
686
        mol - rdkit molobj
687

  
688
    """
689

  
690
    # Get atom connectivity (AC) matrix, list of atomic numbers, molecular charge,
691
    # and mol object with no connectivity information
692
    print("atoms variable: ", atoms)
693
    AC, mol = xyz2AC(atoms, coordinates, charge, use_huckel=use_huckel)
694

  
695
    # Convert AC to bond order matrix and add connectivity and charge info to
696
    # mol object
697
    new_mol = AC2mol(mol, AC, atoms, charge,
698
        allow_charged_fragments=allow_charged_fragments,
699
        use_graph=use_graph)
700

  
701
    # Check for stereocenters and chiral centers
702
    if embed_chiral:
703
        chiral_stereo_check(new_mol)
704

  
705
    return new_mol
706

  
707

  
708
def main():
709

  
710

  
711
    return
712

  
713

  
714
if __name__ == "__main__":
715

  
716
    import argparse
717

  
718
    parser = argparse.ArgumentParser() # usage='%(prog)s [options] molecule.xyz')
719
    parser.add_argument('structure', metavar='structure', type=str)
720
    parser.add_argument('-s', '--sdf',
721
        action="store_true",
722
        help="Dump sdf file")
723
    parser.add_argument('--ignore-chiral',
724
        action="store_true",
725
        help="Ignore chiral centers")
726
    parser.add_argument('--no-charged-fragments',
727
        action="store_true",
728
        help="Allow radicals to be made")
729
    parser.add_argument('--no-graph',
730
        action="store_true",
731
        help="Run xyz2mol without networkx dependencies")
732

  
733
    # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
734
    # otherwise van der Waals radii are used
735
    parser.add_argument('--use-huckel',
736
        action="store_true",
737
        help="Use Huckel method for atom connectivity")
738
    parser.add_argument('-o', '--output-format',
739
        action="store",
740
        type=str,
741
        help="Output format [smiles,sdf] (default=sdf)")
742
    parser.add_argument('-c', '--charge',
743
        action="store",
744
        metavar="int",
745
        type=int,
746
        help="Total charge of the system")
747

  
748
    args = parser.parse_args()
749

  
750
    # read xyz file
751
    filename = args.structure
752

  
753
    # allow for charged fragments, alternatively radicals are made
754
    charged_fragments = not args.no_charged_fragments
755

  
756
    # quick is faster for large systems but requires networkx
757
    # if you don't want to install networkx set quick=False and
758
    # uncomment 'import networkx as nx' at the top of the file
759
    quick = not args.no_graph
760

  
761
    # chiral comment
762
    embed_chiral = not args.ignore_chiral
763

  
764
    # read atoms and coordinates. Try to find the charge
765
    atoms, charge, xyz_coordinates = read_xyz_file(filename)
766

  
767
    # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
768
    # otherwise van der Waals radii are used
769
    use_huckel = args.use_huckel
770

  
771
    # if explicit charge from args, set it
772
    if args.charge is not None:
773
        charge = int(args.charge)
774

  
775
    # Get the molobj
776
    mol = xyz2mol(atoms, xyz_coordinates,
777
        charge=charge,
778
        use_graph=quick,
779
        allow_charged_fragments=charged_fragments,
780
        embed_chiral=embed_chiral,
781
        use_huckel=use_huckel)
782

  
783
    # Print output
784
    if args.output_format == "sdf":
785
        txt = Chem.MolToMolBlock(mol)
786
        print(txt)
787

  
788
    else:
789
        # Canonical hack
790
        isomeric_smiles = not args.ignore_chiral
791
        smiles = Chem.MolToSmiles(mol, isomericSmiles=isomeric_smiles)
792
        m = Chem.MolFromSmiles(smiles)
793
        smiles = Chem.MolToSmiles(m, isomericSmiles=isomeric_smiles)
794
        print(smiles)

Formats disponibles : Unified diff