Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / xyz2mol.py @ 8ab593ee

Historique | Voir | Annoter | Télécharger (21,01 ko)

1
"""
2
Module for generating rdkit molobj/smiles/molecular graph from free atoms
3

4
Implementation by Jan H. Jensen, based on the paper
5

6
    Yeonjoon Kim and Woo Youn Kim
7
    "Universal Structure Conversion Method for Organic Molecules: From Atomic Connectivity
8
    to Three-Dimensional Geometry"
9
    Bull. Korean Chem. Soc. 2015, Vol. 36, 1769-1777
10
    DOI: 10.1002/bkcs.10334
11

12
"""
13

    
14
import copy
15
import itertools
16

    
17
from rdkit.Chem import rdmolops
18
try:
19
    from rdkit.Chem import rdEHTTools #requires RDKit 2019.9.1 or later
20
except ImportError:
21
    rdEHTTools = None
22

    
23
from collections import defaultdict
24

    
25
import numpy as np
26
import networkx as nx
27

    
28
from rdkit import Chem
29
from rdkit.Chem import AllChem, rdmolops
30

    
31
global __ATOM_LIST__
32
__ATOM_LIST__ = \
33
    ['h',  'he',
34
     'li', 'be', 'b',  'c',  'n',  'o',  'f',  'ne',
35
     'na', 'mg', 'al', 'si', 'p',  's',  'cl', 'ar',
36
     'k',  'ca', 'sc', 'ti', 'v ', 'cr', 'mn', 'fe', 'co', 'ni', 'cu',
37
     'zn', 'ga', 'ge', 'as', 'se', 'br', 'kr',
38
     'rb', 'sr', 'y',  'zr', 'nb', 'mo', 'tc', 'ru', 'rh', 'pd', 'ag',
39
     'cd', 'in', 'sn', 'sb', 'te', 'i',  'xe',
40
     'cs', 'ba', 'la', 'ce', 'pr', 'nd', 'pm', 'sm', 'eu', 'gd', 'tb', 'dy',
41
     'ho', 'er', 'tm', 'yb', 'lu', 'hf', 'ta', 'w',  're', 'os', 'ir', 'pt',
42
     'au', 'hg', 'tl', 'pb', 'bi', 'po', 'at', 'rn',
43
     'fr', 'ra', 'ac', 'th', 'pa', 'u',  'np', 'pu']
44

    
45

    
46
global atomic_valence
47
global atomic_valence_electrons
48

    
49
atomic_valence = defaultdict(list)
50
atomic_valence[1] = [1]
51
atomic_valence[6] = [4]
52
atomic_valence[7] = [3,4]
53
atomic_valence[8] = [2,1]
54
atomic_valence[9] = [1]
55
atomic_valence[14] = [4]
56
atomic_valence[15] = [5,3] #[5,4,3]
57
atomic_valence[16] = [6,3,2] #[6,4,2]
58
atomic_valence[17] = [1]
59
atomic_valence[32] = [4]
60
atomic_valence[35] = [1]
61
atomic_valence[53] = [1]
62

    
63
atomic_valence_electrons = {}
64
atomic_valence_electrons[1] = 1
65
atomic_valence_electrons[6] = 4
66
atomic_valence_electrons[7] = 5
67
atomic_valence_electrons[8] = 6
68
atomic_valence_electrons[9] = 7
69
atomic_valence_electrons[14] = 4
70
atomic_valence_electrons[15] = 5
71
atomic_valence_electrons[16] = 6
72
atomic_valence_electrons[17] = 7
73
atomic_valence_electrons[32] = 4
74
atomic_valence_electrons[35] = 7
75
atomic_valence_electrons[53] = 7
76

    
77

    
78
def str_atom(atom):
79
    """
80
    convert integer atom to string atom
81
    """
82
    global __ATOM_LIST__
83
    atom = __ATOM_LIST__[atom - 1]
84
    return atom
85

    
86

    
87
def int_atom(atom):
88
    """
89
    convert str atom to integer atom
90
    """
91
    global __ATOM_LIST__
92
    print(atom)
93
    atom = atom.lower()
94
    return __ATOM_LIST__.index(atom) + 1
95

    
96

    
97
def get_UA(maxValence_list, valence_list):
98
    """
99
    """
100
    UA = []
101
    DU = []
102
    for i, (maxValence, valence) in enumerate(zip(maxValence_list, valence_list)):
103
        if not maxValence - valence > 0:
104
            continue
105
        UA.append(i)
106
        DU.append(maxValence - valence)
107
    return UA, DU
108

    
109

    
110
def get_BO(AC, UA, DU, valences, UA_pairs, use_graph=True):
111
    """
112
    """
113
    BO = AC.copy()
114
    DU_save = []
115

    
116
    while DU_save != DU:
117
        for i, j in UA_pairs:
118
            BO[i, j] += 1
119
            BO[j, i] += 1
120

    
121
        BO_valence = list(BO.sum(axis=1))
122
        DU_save = copy.copy(DU)
123
        UA, DU = get_UA(valences, BO_valence)
124
        UA_pairs = get_UA_pairs(UA, AC, use_graph=use_graph)[0]
125

    
126
    return BO
127

    
128

    
129
def valences_not_too_large(BO, valences):
130
    """
131
    """
132
    number_of_bonds_list = BO.sum(axis=1)
133
    for valence, number_of_bonds in zip(valences, number_of_bonds_list):
134
        if number_of_bonds > valence:
135
            return False
136

    
137
    return True
138

    
139

    
140
def BO_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valances,
141
    allow_charged_fragments=True):
142
    """
143
    Sanity of bond-orders
144

145
    args:
146
        BO -
147
        AC -
148
        charge -
149
        DU - 
150

151

152
    optional
153
        allow_charges_fragments - 
154

155

156
    returns:
157
        boolean - true of molecule is OK, false if not
158
    """
159

    
160
    if not valences_not_too_large(BO, valances):
161
        return False
162

    
163
    # total charge
164
    Q = 0
165

    
166
    # charge fragment list
167
    q_list = []
168

    
169
    if allow_charged_fragments:
170

    
171
        BO_valences = list(BO.sum(axis=1))
172
        for i, atom in enumerate(atoms):
173
            q = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i])
174
            Q += q
175
            if atom == 6:
176
                number_of_single_bonds_to_C = list(BO[i, :]).count(1)
177
                if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
178
                    Q += 1
179
                    q = 2
180
                if number_of_single_bonds_to_C == 3 and Q + 1 < charge:
181
                    Q += 2
182
                    q = 1
183

    
184
            if q != 0:
185
                q_list.append(q)
186

    
187
    check_sum = (BO - AC).sum() == sum(DU)
188
    check_charge = charge == Q
189
    # check_len = len(q_list) <= abs(charge)
190

    
191
    if check_sum and check_charge:
192
        return True
193

    
194
    return False
195

    
196

    
197
def get_atomic_charge(atom, atomic_valence_electrons, BO_valence):
198
    """
199
    """
200

    
201
    if atom == 1:
202
        charge = 1 - BO_valence
203
    elif atom == 5:
204
        charge = 3 - BO_valence
205
    elif atom == 15 and BO_valence == 5:
206
        charge = 0
207
    elif atom == 16 and BO_valence == 6:
208
        charge = 0
209
    else:
210
        charge = atomic_valence_electrons - 8 + BO_valence
211

    
212
    return charge
213

    
214

    
215
def clean_charges(mol):
216
    """
217
    This hack should not be needed anymore, but is kept just in case
218

219
    """
220

    
221
    Chem.SanitizeMol(mol)
222
    #rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',
223
    #              '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]',
224
    #              '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]',
225
    #              '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]',
226
    #              '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]',
227
    #              '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]']
228

    
229
    rxn_smarts = ['[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][CX3-,NX3-:5][#6,#7:6]1=[#6,#7:7]>>'
230
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][-0,-0:5]=[#6,#7:6]1[#6-,#7-:7]',
231
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3](=[#6,#7:4])[#6,#7:5]=[#6,#7:6][CX3-,NX3-:7]1>>'
232
                  '[#6,#7:1]1=[#6,#7:2][#6,#7:3]([#6-,#7-:4])=[#6,#7:5][#6,#7:6]=[-0,-0:7]1']
233

    
234
    fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False)
235

    
236
    for i, fragment in enumerate(fragments):
237
        for smarts in rxn_smarts:
238
            patt = Chem.MolFromSmarts(smarts.split(">>")[0])
239
            while fragment.HasSubstructMatch(patt):
240
                rxn = AllChem.ReactionFromSmarts(smarts)
241
                ps = rxn.RunReactants((fragment,))
242
                fragment = ps[0][0]
243
                Chem.SanitizeMol(fragment)
244
        if i == 0:
245
            mol = fragment
246
        else:
247
            mol = Chem.CombineMols(mol, fragment)
248

    
249
    return mol
250

    
251

    
252
def BO2mol(mol, BO_matrix, atoms, atomic_valence_electrons,
253
           mol_charge, allow_charged_fragments=True):
254
    """
255
    based on code written by Paolo Toscani
256

257
    From bond order, atoms, valence structure and total charge, generate an
258
    rdkit molecule.
259

260
    args:
261
        mol - rdkit molecule
262
        BO_matrix - bond order matrix of molecule
263
        atoms - list of integer atomic symbols
264
        atomic_valence_electrons -
265
        mol_charge - total charge of molecule
266

267
    optional:
268
        allow_charged_fragments - bool - allow charged fragments
269

270
    returns
271
        mol - updated rdkit molecule with bond connectivity
272

273
    """
274

    
275
    l = len(BO_matrix)
276
    l2 = len(atoms)
277
    BO_valences = list(BO_matrix.sum(axis=1))
278

    
279
    if (l != l2):
280
        raise RuntimeError('sizes of adjMat ({0:d}) and Atoms {1:d} differ'.format(l, l2))
281

    
282
    rwMol = Chem.RWMol(mol)
283

    
284
    bondTypeDict = {
285
        1: Chem.BondType.SINGLE,
286
        2: Chem.BondType.DOUBLE,
287
        3: Chem.BondType.TRIPLE
288
    }
289

    
290
    for i in range(l):
291
        for j in range(i + 1, l):
292
            bo = int(round(BO_matrix[i, j]))
293
            if (bo == 0):
294
                continue
295
            bt = bondTypeDict.get(bo, Chem.BondType.SINGLE)
296
            rwMol.AddBond(i, j, bt)
297

    
298
    mol = rwMol.GetMol()
299

    
300
    if allow_charged_fragments:
301
        mol = set_atomic_charges(
302
            mol,
303
            atoms,
304
            atomic_valence_electrons,
305
            BO_valences,
306
            BO_matrix,
307
            mol_charge)
308
    else:
309
        mol = set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences)
310

    
311
    return mol
312

    
313

    
314
def set_atomic_charges(mol, atoms, atomic_valence_electrons,
315
                       BO_valences, BO_matrix, mol_charge):
316
    """
317
    """
318
    q = 0
319
    for i, atom in enumerate(atoms):
320
        a = mol.GetAtomWithIdx(i)
321
        charge = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i])
322
        q += charge
323
        if atom == 6:
324
            number_of_single_bonds_to_C = list(BO_matrix[i, :]).count(1)
325
            if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
326
                q += 1
327
                charge = 0
328
            if number_of_single_bonds_to_C == 3 and q + 1 < mol_charge:
329
                q += 2
330
                charge = 1
331

    
332
        if (abs(charge) > 0):
333
            a.SetFormalCharge(int(charge))
334

    
335
    mol = clean_charges(mol)
336

    
337
    return mol
338

    
339

    
340
def set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences):
341
    """
342

343
    The number of radical electrons = absolute atomic charge
344

345
    """
346
    for i, atom in enumerate(atoms):
347
        a = mol.GetAtomWithIdx(i)
348
        charge = get_atomic_charge(
349
            atom,
350
            atomic_valence_electrons[atom],
351
            BO_valences[i])
352

    
353
        if (abs(charge) > 0):
354
            a.SetNumRadicalElectrons(abs(int(charge)))
355

    
356
    return mol
357

    
358

    
359
def get_bonds(UA, AC):
360
    """
361

362
    """
363
    bonds = []
364

    
365
    for k, i in enumerate(UA):
366
        for j in UA[k + 1:]:
367
            if AC[i, j] == 1:
368
                bonds.append(tuple(sorted([i, j])))
369

    
370
    return bonds
371

    
372

    
373
def get_UA_pairs(UA, AC, use_graph=True):
374
    """
375

376
    """
377

    
378
    bonds = get_bonds(UA, AC)
379

    
380
    if len(bonds) == 0:
381
        return [()]
382

    
383
    if use_graph:
384
        G = nx.Graph()
385
        G.add_edges_from(bonds)
386
        UA_pairs = [list(nx.max_weight_matching(G))]
387
        return UA_pairs
388

    
389
    max_atoms_in_combo = 0
390
    UA_pairs = [()]
391
    for combo in list(itertools.combinations(bonds, int(len(UA) / 2))):
392
        flat_list = [item for sublist in combo for item in sublist]
393
        atoms_in_combo = len(set(flat_list))
394
        if atoms_in_combo > max_atoms_in_combo:
395
            max_atoms_in_combo = atoms_in_combo
396
            UA_pairs = [combo]
397

    
398
        elif atoms_in_combo == max_atoms_in_combo:
399
            UA_pairs.append(combo)
400

    
401
    return UA_pairs
402

    
403

    
404
def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
405
    """
406

407
    implemenation of algorithm shown in Figure 2
408

409
    UA: unsaturated atoms
410

411
    DU: degree of unsaturation (u matrix in Figure)
412

413
    best_BO: Bcurr in Figure
414

415
    """
416

    
417
    global atomic_valence
418
    global atomic_valence_electrons
419

    
420
    # make a list of valences, e.g. for CO: [[4],[2,1]]
421
    valences_list_of_lists = []
422
    AC_valence = list(AC.sum(axis=1))
423
    for atomicNum in atoms:
424
        valences_list_of_lists.append(atomic_valence[atomicNum])
425

    
426
    # convert [[4],[2,1]] to [[4,2],[4,1]]
427
    valences_list = itertools.product(*valences_list_of_lists)
428

    
429
    best_BO = AC.copy()
430

    
431
    for valences in valences_list:
432

    
433
        UA, DU_from_AC = get_UA(valences, AC_valence)
434

    
435
        check_len = (len(UA) == 0)
436
        if check_len:
437
            check_bo = BO_is_OK(AC, AC, charge, DU_from_AC,
438
                atomic_valence_electrons, atoms, valences,
439
                allow_charged_fragments=allow_charged_fragments)
440
        else:
441
            check_bo = None
442

    
443
        if check_len and check_bo:
444
            return AC, atomic_valence_electrons
445

    
446
        UA_pairs_list = get_UA_pairs(UA, AC, use_graph=use_graph)
447
        for UA_pairs in UA_pairs_list:
448
            BO = get_BO(AC, UA, DU_from_AC, valences, UA_pairs, use_graph=use_graph)
449
            status = BO_is_OK(BO, AC, charge, DU_from_AC,
450
                        atomic_valence_electrons, atoms, valences,
451
                        allow_charged_fragments=allow_charged_fragments)
452

    
453
            if status:
454
                return BO, atomic_valence_electrons
455

    
456
            elif BO.sum() >= best_BO.sum() and valences_not_too_large(BO, valences):
457
                best_BO = BO.copy()
458

    
459
    return best_BO, atomic_valence_electrons
460

    
461

    
462
def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
463
    """
464
    """
465

    
466
    # convert AC matrix to bond order (BO) matrix
467
    BO, atomic_valence_electrons = AC2BO(
468
        AC,
469
        atoms,
470
        charge,
471
        allow_charged_fragments=allow_charged_fragments,
472
        use_graph=use_graph)
473

    
474
    # add BO connectivity and charge info to mol object
475
    mol = BO2mol(
476
        mol,
477
        BO,
478
        atoms,
479
        atomic_valence_electrons,
480
        charge,
481
        allow_charged_fragments=allow_charged_fragments)
482

    
483
    return mol
484

    
485

    
486
def get_proto_mol(atoms):
487
    """
488
    """
489
    mol = Chem.MolFromSmarts("[#" + str(atoms[0]) + "]")
490
    rwMol = Chem.RWMol(mol)
491
    for i in range(1, len(atoms)):
492
        a = Chem.Atom(atoms[i])
493
        rwMol.AddAtom(a)
494

    
495
    mol = rwMol.GetMol()
496

    
497
    return mol
498

    
499

    
500
def read_xyz_file(filename, look_for_charge=True):
501
    """
502
    """
503

    
504
    atomic_symbols = []
505
    xyz_coordinates = []
506
    charge = 0
507
    title = ""
508

    
509
    with open(filename, "r") as file:
510
        for line_number, line in enumerate(file):
511
            if line_number == 0:
512
                num_atoms = int(line)
513
            elif line_number == 1:
514
                title = line
515
                if "charge=" in line: #TODO: More flexible charge declaration
516
                    charge = int(line.split("=")[1])
517
            else:
518
                atomic_symbol, x, y, z = line.split()
519
                atomic_symbols.append(atomic_symbol)
520
                xyz_coordinates.append([float(x), float(y), float(z)])
521

    
522
    atoms = [int_atom(atom) for atom in atomic_symbols]
523

    
524
    return atoms, charge, xyz_coordinates
525

    
526

    
527
def xyz2AC(atoms, xyz, charge, use_huckel=False):
528
    """
529

530
    atoms and coordinates to atom connectivity (AC)
531

532
    args:
533
        atoms - int atom types
534
        xyz - coordinates
535
        charge - molecule charge
536

537
    optional:
538
        use_huckel - Use Huckel method for atom connecitivty
539

540
    returns
541
        ac - atom connectivity matrix
542
        mol - rdkit molecule
543

544
    """
545

    
546
    if use_huckel:
547
        return xyz2AC_huckel(atoms, xyz, charge)
548
    else:
549
        return xyz2AC_vdW(atoms, xyz)
550

    
551

    
552
def xyz2AC_vdW(atoms, xyz):
553

    
554
    # Get mol template
555
    mol = get_proto_mol(atoms)
556

    
557
    # Set coordinates
558
    conf = Chem.Conformer(mol.GetNumAtoms())
559
    for i in range(mol.GetNumAtoms()):
560
        conf.SetAtomPosition(i, (xyz[i][0], xyz[i][1], xyz[i][2]))
561
    mol.AddConformer(conf)
562

    
563
    AC = get_AC(mol)
564

    
565
    return AC, mol
566

    
567

    
568
def get_AC(mol, covalent_factor=1.3):
569
    """
570

571
    Generate adjacent matrix from atoms and coordinates.
572

573
    AC is a (num_atoms, num_atoms) matrix with 1 being covalent bond and 0 is not
574

575

576
    covalent_factor - 1.3 is an arbitrary factor
577

578
    args:
579
        mol - rdkit molobj with 3D conformer
580

581
    optional
582
        covalent_factor - increase covalent bond length threshold with facto
583

584
    returns:
585
        AC - adjacent matrix
586

587
    """
588

    
589
    # Calculate distance matrix
590
    dMat = Chem.Get3DDistanceMatrix(mol)
591

    
592
    pt = Chem.GetPeriodicTable()
593
    num_atoms = mol.GetNumAtoms()
594
    AC = np.zeros((num_atoms, num_atoms), dtype=int)
595

    
596
    for i in range(num_atoms):
597
        a_i = mol.GetAtomWithIdx(i)
598
        Rcov_i = pt.GetRcovalent(a_i.GetAtomicNum()) * covalent_factor
599
        for j in range(i + 1, num_atoms):
600
            a_j = mol.GetAtomWithIdx(j)
601
            Rcov_j = pt.GetRcovalent(a_j.GetAtomicNum()) * covalent_factor
602
            if dMat[i, j] <= Rcov_i + Rcov_j:
603
                AC[i, j] = 1
604
                AC[j, i] = 1
605

    
606
    return AC
607

    
608

    
609
def xyz2AC_huckel(atomicNumList,xyz,charge):
610
    """
611

612
    args
613
        atomicNumList - atom type list
614
        xyz - coordinates
615
        charge - molecule charge
616

617
    returns
618
        ac - atom connectivity
619
        mol - rdkit molecule
620

621
    """
622
    mol = get_proto_mol(atomicNumList)
623

    
624
    conf = Chem.Conformer(mol.GetNumAtoms())
625
    for i in range(mol.GetNumAtoms()):
626
        conf.SetAtomPosition(i,(xyz[i][0],xyz[i][1],xyz[i][2]))
627
    mol.AddConformer(conf)
628

    
629
    num_atoms = len(atomicNumList)
630
    AC = np.zeros((num_atoms,num_atoms)).astype(int)
631

    
632
    mol_huckel = Chem.Mol(mol)
633
    mol_huckel.GetAtomWithIdx(0).SetFormalCharge(charge) #mol charge arbitrarily added to 1st atom    
634

    
635
    passed,result = rdEHTTools.RunMol(mol_huckel)
636
    opop = result.GetReducedOverlapPopulationMatrix()
637
    tri = np.zeros((num_atoms, num_atoms))
638
    tri[np.tril(np.ones((num_atoms, num_atoms), dtype=bool))] = opop #lower triangular to square matrix
639
    for i in range(num_atoms):
640
        for j in range(i+1,num_atoms):
641
            pair_pop = abs(tri[j,i])   
642
            if pair_pop >= 0.15: #arbitry cutoff for bond. May need adjustment
643
                AC[i,j] = 1
644
                AC[j,i] = 1
645

    
646
    return AC, mol
647

    
648

    
649
def chiral_stereo_check(mol):
650
    """
651
    Find and embed chiral information into the model based on the coordinates
652

653
    args:
654
        mol - rdkit molecule, with embeded conformer
655

656
    """
657
    Chem.SanitizeMol(mol)
658
    Chem.DetectBondStereochemistry(mol, -1)
659
    Chem.AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True)
660
    Chem.AssignAtomChiralTagsFromStructure(mol, -1)
661

    
662
    return
663

    
664

    
665
def xyz2mol(atoms, coordinates,
666
    charge=0,
667
    allow_charged_fragments=True,
668
    use_graph=True,
669
    use_huckel=False,
670
    embed_chiral=True):
671
    """
672
    Generate a rdkit molobj from atoms, coordinates and a total_charge.
673

674
    args:
675
        atoms - list of atom types (int)
676
        coordinates - 3xN Cartesian coordinates
677
        charge - total charge of the system (default: 0)
678

679
    optional:
680
        allow_charged_fragments - alternatively radicals are made
681
        use_graph - use graph (networkx)
682
        use_huckel - Use Huckel method for atom connectivity prediction
683
        embed_chiral - embed chiral information to the molecule
684

685
    returns:
686
        mol - rdkit molobj
687

688
    """
689

    
690
    # Get atom connectivity (AC) matrix, list of atomic numbers, molecular charge,
691
    # and mol object with no connectivity information
692
    print("atoms variable: ", atoms)
693
    AC, mol = xyz2AC(atoms, coordinates, charge, use_huckel=use_huckel)
694

    
695
    # Convert AC to bond order matrix and add connectivity and charge info to
696
    # mol object
697
    new_mol = AC2mol(mol, AC, atoms, charge,
698
        allow_charged_fragments=allow_charged_fragments,
699
        use_graph=use_graph)
700

    
701
    # Check for stereocenters and chiral centers
702
    if embed_chiral:
703
        chiral_stereo_check(new_mol)
704

    
705
    return new_mol
706

    
707

    
708
def main():
709

    
710

    
711
    return
712

    
713

    
714
if __name__ == "__main__":
715

    
716
    import argparse
717

    
718
    parser = argparse.ArgumentParser() # usage='%(prog)s [options] molecule.xyz')
719
    parser.add_argument('structure', metavar='structure', type=str)
720
    parser.add_argument('-s', '--sdf',
721
        action="store_true",
722
        help="Dump sdf file")
723
    parser.add_argument('--ignore-chiral',
724
        action="store_true",
725
        help="Ignore chiral centers")
726
    parser.add_argument('--no-charged-fragments',
727
        action="store_true",
728
        help="Allow radicals to be made")
729
    parser.add_argument('--no-graph',
730
        action="store_true",
731
        help="Run xyz2mol without networkx dependencies")
732

    
733
    # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
734
    # otherwise van der Waals radii are used
735
    parser.add_argument('--use-huckel',
736
        action="store_true",
737
        help="Use Huckel method for atom connectivity")
738
    parser.add_argument('-o', '--output-format',
739
        action="store",
740
        type=str,
741
        help="Output format [smiles,sdf] (default=sdf)")
742
    parser.add_argument('-c', '--charge',
743
        action="store",
744
        metavar="int",
745
        type=int,
746
        help="Total charge of the system")
747

    
748
    args = parser.parse_args()
749

    
750
    # read xyz file
751
    filename = args.structure
752

    
753
    # allow for charged fragments, alternatively radicals are made
754
    charged_fragments = not args.no_charged_fragments
755

    
756
    # quick is faster for large systems but requires networkx
757
    # if you don't want to install networkx set quick=False and
758
    # uncomment 'import networkx as nx' at the top of the file
759
    quick = not args.no_graph
760

    
761
    # chiral comment
762
    embed_chiral = not args.ignore_chiral
763

    
764
    # read atoms and coordinates. Try to find the charge
765
    atoms, charge, xyz_coordinates = read_xyz_file(filename)
766

    
767
    # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
768
    # otherwise van der Waals radii are used
769
    use_huckel = args.use_huckel
770

    
771
    # if explicit charge from args, set it
772
    if args.charge is not None:
773
        charge = int(args.charge)
774

    
775
    # Get the molobj
776
    mol = xyz2mol(atoms, xyz_coordinates,
777
        charge=charge,
778
        use_graph=quick,
779
        allow_charged_fragments=charged_fragments,
780
        embed_chiral=embed_chiral,
781
        use_huckel=use_huckel)
782

    
783
    # Print output
784
    if args.output_format == "sdf":
785
        txt = Chem.MolToMolBlock(mol)
786
        print(txt)
787

    
788
    else:
789
        # Canonical hack
790
        isomeric_smiles = not args.ignore_chiral
791
        smiles = Chem.MolToSmiles(mol, isomericSmiles=isomeric_smiles)
792
        m = Chem.MolFromSmiles(smiles)
793
        smiles = Chem.MolToSmiles(m, isomericSmiles=isomeric_smiles)
794
        print(smiles)