Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / ASANN.py @ a44ad3c2

Historique | Voir | Annoter | Télécharger (41,34 ko)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3

    
4
"""Python3 implementation of the ASANN algorithm (Anisotropically corrected
5
Solid-Angle based Nearest-Neighbors) """
6

    
7
# EXTERNAL MODULES IMPORTS ###
8
import numpy as np
9
import math
10

    
11

    
12
# FUNCTIONS DEFINITION ###
13

    
14
# PRE-PROCESSING FUNCTIONS ##
15
def add_periodic_images(coords, cell, mode):
16
    """Explicitely add adjacent or surrounding periodic images.
17

18
    Parameters: coords (2D array): List of atoms coordinates to consider.
19
    Important: direct coordinates are expected (torus topology of side 1) if
20
    `pbc` is set to True. Shape expected: (nb_atoms, nb_dim), where nb_atoms
21
    is the number of atoms considered, and nb_dim is the dimensionnality of
22
    the system.
23

24
        cell (numpy 2D array): List of cell vectors to consider for periodic
25
        boundaries conditions. Shape expected: (nb_dim, nb_dim), where nb_dim
26
        is the dimensionnality of the system considered. Exemple:
27
        cell_vectors=[[v1_x, v1_y, v1_z], [v2_x, v2_y, v2_z], [v3_x, v3_y,
28
        v3_z]]
29

30
        mode (str): Determines which periodic images should be included. If
31
        'adjacent', all adjacent periodic images are included (all periodic
32
        images sharing a face), increasing the number of coordinates 9-fold.
33
        If 'full', all surrounding periodic images are included (all periodic
34
        images sharing a face, an edge or a point), increasing the number of
35
        coordinates 27-fold.
36

37
    Returns: (nb_atoms, new_coords) nb_atoms: number of real atom coordinates
38
    new_coords: numpy 2D array containing updated coordinates (initial +
39
    periodic images) Shape: (nb_coords, nb_dim)
40
    """
41
    # Initialize new coordinates
42
    new_coords = np.mod(coords, 1)  # Project coords inside initial cell
43

    
44
    # Iterate over cell vectors
45
    for vect in np.eye(cell.shape[0]):
46
        # Add periodic images
47
        if mode == 'adjacent':
48
            new_coords = np.vstack((new_coords, coords + vect, coords - vect))
49
        elif mode == 'full':
50
            new_coords = np.vstack((new_coords, new_coords + vect,
51
                                    new_coords - vect))  # Recursive process
52
            # to include all dimension combinaisons
53
        else:
54
            raise NotImplementedError
55

    
56
    return coords.shape[0], new_coords
57

    
58

    
59
def get_pbc_vectors(coords, pbc, nb_atoms=None):
60
    """Compute pairwise vectors with or without periodic boundaries conditions.
61

62
    Parameters:
63
        coords (numpy 2D array): List of atoms coordinates to
64
        consider. Important: direct coordinates are expected (torus topology of
65
        side 1) if `pbc` is set to True. Shape expected: (nb_coords, nb_dim),
66
        where nb_coords is the number of coordinates considered, and nb_dim is
67
        the dimensionnality of the system.
68

69
        pbc (bool): Determines if periodic boundaries conditions should be
70
        applied. Default to True. If True, coordinates are interpreted as
71
        direct coordinates and the distance between points is computed as the
72
        minimum euclidian distance between all duplicates (due to periodic
73
        boundaries conditions) of these points. If false, coordinates are
74
        interpreted as cartesian coordinates and the metric used is simply
75
        the euclidian distance. Note: the minimum image convention is not
76
        applied if periodic images are already explicitely included.
77

78
        nb_atoms (int, optional): Number of real atoms coordinates (i.e. for
79
        which distances must be computed). This is particularly useful for
80
        excluding periodic images coordinates as central atoms. The real
81
        atoms coordinates are supposed to be the first coordinates in `coords`
82

83
    Returns: numpy 2D array containing pairwise vectors from `coords`.
84
    Important: if coords are direct coordinates (i.e. `pbc` is set to True),
85
    the vectors are in direct coordinates. If coords are in cartesian
86
    coordinates (i.e. `pbc` is set to False), the vectors are in cartesian
87
    coordinates. vectors[i,j] = (v_ij_x, v_ij_y, v_ij_z) = (r_j_x - r_i_x,
88
    r_j_y - r_i_y, r_j_z - r_i_z) Shape : (nb_atoms, nb_coords, nb_dim)
89
    """
90
    # Retrieve number of non-virtual atoms (from which distances are computed)
91
    if nb_atoms is None:
92
        nb_atoms = coords.shape[0]
93

    
94
    # Compute cartesian vectors
95
    vectors = coords[np.newaxis, :, :] - coords[:nb_atoms, np.newaxis, :]
96

    
97
    # Applying PBC (if minimum image convention is required)
98
    if pbc and pbc not in ('adjacent', 'full'):
99
        # vectors = np.mod(vectors + 0.5, 1) - 0.5 # modulo operation is a
100
        # bit slower than floor operation...
101
        vectors -= np.floor(vectors + 0.5)
102

    
103
    return vectors
104

    
105

    
106
def get_sorted_distances(coords, pbc, nb_atoms=None, cell=np.eye(3)):
107
    """Compute distances-sorted pairwise distances and vectors with or
108
    without periodic boundaries conditions.
109

110
    Parameters: coords (numpy 2D array): List of atoms coordinates to
111
    consider. Important: direct coordinates are expected (torus topology of
112
    side 1) if `pbc` is set to True. Shape expected: (nb_atoms, nb_dim),
113
    where nb_atoms is the number of atoms considered, and nb_dim is the
114
    dimensionnality of the system.
115

116
        pbc (bool): Determines if periodic boundaries conditions should be
117
        applied. Default to True. If True, coordinates are interpreted as
118
        direct coordinates and the distance between points is computed as the
119
        minimum euclidian distance between all duplicates (due to periodic
120
        boundaries conditions) of these points. If false, coordinates are
121
        interpreted as cartesian coordinates and the metric used is simply
122
        the euclidian distance.
123

124
        nb_atoms (int, optional): Number of real atoms coordinates (i.e. for
125
        which distances must be computed). This is particularly useful for
126
        excluding periodic images coordinates as central atoms. If None,
127
        all `coords` coordinates are considered. Default: None
128

129
        cell (2D array, optional): List of cell vectors to consider when
130
        periodic boundaries conditions are considered. Shape expected: (
131
        nb_dim, nb_dim), where nb_dim is the dimensionnality of the system
132
        considered. Important: For this parameter to be taken into account,
133
        `pbc` must be set to True. Exemple: cell_vectors=[[v1_x, v1_y, v1_z],
134
        [v2_x, v2_y, v2_z], [v3_x, v3_y, v3_z]] Default: numpy.eye(3) (i.e. a
135
        cubic cell of side 1).
136

137
    Returns: (sorted_distances, sorted_vectors, sorted_indexes)
138
    sorted_vectors: numpy 3D array containing pairwise vectors from `coords`,
139
    where each i-th row is sorted by increasing distance with respect to the
140
    i-th coordinates. Important: The vectors are in cartesian coordinates.
141
    sorted_vectors[i,j] = (v_ij_x, v_ij_y, v_ij_z) = (r_j_x - r_i_x, r_j_y -
142
    r_i_y, r_j_z - r_i_z) Shape : (nb_atoms, nb_coords, nb_dim)
143
    sorted_distances: numpy 2D array containing pairwise distances from
144
    `coords`, where each i-th row is sorted by increasing distance with
145
    respect to the i-th coordinates. Important: Cartesian euclidian distance
146
    is used here. sorted_distances[i,j-1] <= sorted_distances[i,
147
    j] <= sorted_distances[i,j+1] Shape : (nb_atoms, nb_coords)
148
    sorted_indexes: numpy 2D array containing for each row the
149
    distances-sorted indexes of neighbors. In other words, the atom indexed
150
    sorted_indexes[i,j] is the j-th nearest neighbor of atom i. Shape : (
151
    nb_atoms, nb_coords)
152
    """
153
    # Retrieve number of atoms if not given
154
    if nb_atoms is None:
155
        nb_atoms = coords.shape[0]
156

    
157
    # Computes pairwise vectors
158
    vectors = get_pbc_vectors(coords, pbc, nb_atoms=nb_atoms)  # vector.shape =
159
    # (nb_atoms, nb_coords, nb_dim)
160

    
161
    # Convert into cartesian coordinates if pbc
162
    if pbc:
163
        vectors = np.dot(vectors, cell)  # dot product with cell vectors to
164
        # have cartesian coordinates (for distance computation)
165

    
166
    # Computes pairwise distances
167
    distances = np.sqrt(np.sum(vectors ** 2, axis=-1))  # simply the square
168
    # root of the sum of squared components for each pairwise vector
169

    
170
    # Sorting vectors and distances (with respect to distance) for improved
171
    # performance of the (CA)SANN algorithm Getting sorted indexes to apply to
172
    # distances and vectors
173
    sorted_index_axis1 = np.argsort(distances, axis=-1)  # sorting columns
174
    sorted_index_axis0 = np.arange(nb_atoms)[:, None]  # keeping rows
175
    # Rearranging distances and vectors so that each row is sorted by
176
    # increasing distance. (i.e. distances[i, j-1] <= distances[i,
177
    # j] <= distances[i, j+1])
178
    distances = distances[sorted_index_axis0, sorted_index_axis1]
179
    vectors = vectors[sorted_index_axis0, sorted_index_axis1]
180

    
181
    return distances, vectors, sorted_index_axis1
182

    
183

    
184
# SANN IMPLEMENTATION ##
185
def get_SANN(all_distances):
186
    """Computes coordination numbers according to the SANN algorithm,
187
    from all pairwise distances.
188

189
    Parameters: all_distances: numpy 2D array containing pairwise distances
190
    from `coords`, where each i-th row is sorted by increasing distance with
191
    respect to the i-th coordinates. Important: Cartesian euclidian distance
192
    is used here. sorted_distances[i,j-1] <= sorted_distances[i,
193
    j] <= sorted_distances[i,j+1] Shape : (nb_atoms, nb_coords)
194

195
    Returns: list_sann_CN : Numpy 1D array containing SANN-based coordination
196
    numbers (i.e. number of neighbors).
197

198
        list_sann_radius : Numpy 1D array containing SANN-based coordination
199
        radius (i.e. coordination sphere radius).
200
    """
201

    
202
    # Retrieve number of atoms
203
    nb_coords = all_distances.shape[1]
204

    
205
    # Initialize coordination number vector (i-th element is the coordination
206
    # number of the i-th atom) and coordination radius
207
    list_sann_CN = list()
208
    list_sann_radius = list()
209
    # Initialize sum of distances vector (i-th element is meant to temporarly
210
    # store the sum of the i-th atom's 3 nearest neighbors distances)
211
    list_dist_sum = all_distances[:, 1:4].sum(axis=1)
212

    
213
    # Treat each atom separately (since CN can be different for each atom,
214
    # Numpy methods are unsuited here)
215
    for (dist_sum, atom_distances) in zip(list_dist_sum, all_distances):
216
        sann_CN = 3  # Set CN to 3 (i.e. the minimum CN value for the SANN
217
        # algorithm) SANN algorithm (i.e. while SANN radius sum(r_ij,1,
218
        # m)/(m-2) > r_i(m+1), increase m by 1)
219
        while (sann_CN + 1 < nb_coords) and (
220
                dist_sum / (sann_CN - 2) >= atom_distances[sann_CN + 1]):
221
            dist_sum += atom_distances[sann_CN + 1]
222
            sann_CN += 1
223
        # Store SANN CN found
224
        list_sann_CN.append(sann_CN)
225
        list_sann_radius.append(dist_sum / (sann_CN - 2))
226

    
227
    return (np.array(list_sann_CN), np.array(list_sann_radius))
228

    
229

    
230
## ANISOTROPY HANDLING ##
231
def dist_to_barycenter(nearest_neighbors, nearest_distances, radius):
232
    """Compute the distance from the central atom to the barycenter of
233
    nearest neighbors.
234

235
    Parameters: nearest_neighbors: numpy 2D array containing nearest
236
    neighbors vectors from the central atom, sorted by increasing distance
237
    with respect to the central atom. Important: The vectors must be in
238
    cartesian coordinates.
239

240
        nearest_distances: numpy 1D array containing nearest neighbors
241
        distances from the central atom, sorted by increasing distance with
242
        respect to the central atom.
243

244
        radius (float): radius R_i_m of the sphere of coordination.
245

246
    Returns: dist_bary (float): distance from the central atom to the
247
    barycenter of nearest neighbors, weighted by relative solid angles
248

249
    Computational details: The barycenter is computed using a solid angle
250
    weight (i.e. the solid angle associated with the corresponding neighbor).
251
    """
252

    
253
    # Compute solid angles (SA) of neighbors
254
    list_SA = 1 - nearest_distances / radius
255

    
256
    # Compute SA-based barycenter
257
    bary_vector = np.sum(nearest_neighbors * list_SA[:, np.newaxis],
258
                         axis=0) / np.sum(list_SA)
259

    
260
    # Returns distance from the central atom to the barycenter
261
    return (math.sqrt(np.sum(bary_vector ** 2)), bary_vector)
262

    
263

    
264
def angular_correction(nearest_neighbors, nearest_distances, radius):
265
    """Compute the angular correction `ang_corr`, such that R_i_m = sum(r_ij,
266
    j=1..m)/(m-2(1-ang_corr)).
267

268
    Parameters: nearest_neighbors: numpy 2D array containing the m nearest
269
    neighbors vectors from the central atom, sorted by increasing distance
270
    with respect to the central atom. Important: The vectors must be in
271
    cartesian coordinates.
272

273
        nearest_distances: numpy 1D array containing the m nearest neighbors
274
        distances from the central atom, sorted by increasing distance with
275
        respect to the central atom.
276

277
        radius (float): radius R_i_m of the sphere of coordination.
278

279
    Returns:
280
        ang_corr (float): angular correction.
281

282
    Computational details: The angular correction is computed from the
283
    distance between the nearest neighbor barycenter and the central atom
284
    `dist_bary`.
285

286
        Let us define `alpha` such that: dist_bary = alpha * radius Then,
287
        mathematical derivations show that: ang_corr = (alpha + sqrt(alpha**2
288
        + 3*alpha))/3
289
    """
290

    
291
    # Computing the ratio between the distance to the nearest neighbors
292
        # barycenter and the radius
293
    alpha = dist_to_barycenter(nearest_neighbors, nearest_distances, radius)[
294
                0] / radius
295
    vector = dist_to_barycenter(nearest_neighbors, nearest_distances, radius)[1]
296

    
297
    # Computing angular correction
298
    return ((alpha + math.sqrt(alpha ** 2 + 3 * alpha)) / 3, vector)
299

    
300

    
301
## ASANN IMPLEMENTATION ##
302
def get_ASANN(sorted_distances, sorted_vectors, sann_CNs, sann_radii):
303
    """Update ASANN-based coordination numbers using an angular correction term.
304

305
    Parameters: sorted_vectors: numpy 3D array containing pairwise vectors
306
    from `coords`, where each i-th row is sorted by increasing distance with
307
    respect to the i-th coordinates. Important: The vectors must be in
308
    cartesian coordinates. sorted_vectors[i,j] = (v_ij_x, v_ij_y, v_ij_z) = (
309
    r_j_x - r_i_x, r_j_y - r_i_y, r_j_z - r_i_z) Shape : (nb_atoms,
310
    nb_coords, nb_dim)
311

312
        sorted_distances: numpy 2D array containing pairwise distances from
313
        `coords`, where each i-th row is sorted by increasing distance with
314
        respect to the i-th coordinates. Important: Cartesian euclidian
315
        distance is used here. sorted_distances[i,j-1] <= sorted_distances[i,
316
        j] <= sorted_distances[i,j+1] Shape : (nb_atoms, nb_coords)
317

318
        sann_CNs: Numpy 1D array containing SANN-based coordination numbers (
319
        i.e. number of neighbors).
320

321
        sann_radii: Numpy 1D array containing SANN-based coordination radius
322
        (i.e. radius of the coordination spheres).
323

324
    Returns: list_asann_CN : Numpy 1D array containing ASANN-based
325
    coordination numbers (i.e. number of neighbors, with an angular
326
    correction term).
327

328
        list_asann_radius : Numpy 1D array containing ASANN-based
329
        coordination radius (i.e. coordination sphere radius).
330

331
    Computational details: ASANN-based coordination number is defined as the
332
    maximum coordination number m' such that forall m>=m', R_ang_i_m = sum(
333
    r_ij, j=1..m)/(m-2(1-ang_corr)) < r_i(m+1)
334

335
        It is easy to show that R_ang_i_m <= R_i_m, and therefore, m'<=m.
336
        Consequently, the ASANN algorithm is sure to converge.
337

338
        Unlike SC-ASANN algorithm, the angular correction is solely computed
339
        using the SANN radius (instead of a self-coherent approach, where the
340
        angular term is defined by (and defines) the ASANN radius itself)
341
    """
342

    
343
    # Retrieve number of atoms
344
    nb_coords = sorted_distances.shape[1]
345

    
346
    # Create coordination number vector (i-th element is the coordination
347
    # number of the i-th atom) and coordination radius
348
    list_asann_CN = list()
349
    list_asann_radius = list()
350
    list_bary_vector = list()
351

    
352
    # Treat each atom separately (since CN can be different for each atom,
353
    # Numpy methods are unsuited here)
354
    for (atom_distances, atom_neighbors, sann_CN, sann_radius) in zip(
355
            sorted_distances, sorted_vectors, sann_CNs, sann_radii):
356

    
357
        # Computes angular correction
358
        nearest_distances = atom_distances[1:sann_CN + 1]
359
        nearest_neighbors = atom_neighbors[1:sann_CN + 1]
360
        ang_corr, vec = angular_correction(nearest_neighbors, nearest_distances,
361
                                           sann_radius)
362
        beta = 2 * (1 - ang_corr)
363

    
364
        # ASANN algorithm (i.e. while ASANN radius sum(r_ij, j=1..m)/(m-2*(
365
        # 1-ang_corr)) >= r_i(m+1), increase m by 1)
366
        asann_CN = int(
367
            beta) + 1  # Set CN to floor(2*(1-ang_corr)) + 1 (i.e. the
368
        # minimum CN value for the ASANN algorithm)
369
        dist_sum = atom_distances[
370
                   1:asann_CN + 1].sum()  # Initialize sum of distances
371
        while (asann_CN + 1 < nb_coords) and (
372
                dist_sum / (asann_CN - beta) >= atom_distances[asann_CN + 1]):
373
            dist_sum += atom_distances[asann_CN + 1]
374
            asann_CN += 1
375

    
376
        # Store ASANN CN and radius found
377
        list_asann_CN.append(asann_CN)
378
        list_asann_radius.append(dist_sum / (asann_CN - beta))
379
        list_bary_vector.append(vec)
380

    
381
    return (np.array(list_asann_CN), np.array(list_asann_radius),
382
            np.array(list_bary_vector))
383

    
384

    
385
# VARIANTS DEFINITIONS ##
386
def get_self_consistent_ASANN(sorted_distances, sorted_vectors, sann_CNs,
387
                              radius_eps=1e-2):
388
    """Update ASANN-based coordination numbers using an angular correction
389
    term computed in a self-consistent manner.
390

391
    Parameters: sorted_vectors: numpy 3D array containing pairwise vectors
392
    from `coords`, where each i-th row is sorted by increasing distance with
393
    respect to the i-th coordinates. Important: The vectors must be in
394
    cartesian coordinates. sorted_vectors[i,j] = (v_ij_x, v_ij_y, v_ij_z) = (
395
    r_j_x - r_i_x, r_j_y - r_i_y, r_j_z - r_i_z) Shape: (nb_atoms, nb_atoms,
396
    nb_dim)
397

398
        sorted_distances: numpy 2D array containing pairwise distances from
399
        `coords`, where each i-th row is sorted by increasing distance with
400
        respect to the i-th coordinates. Important: Cartesian euclidian
401
        distance is used here. sorted_distances[i,j-1] <= sorted_distances[i,
402
        j] <= sorted_distances[i,j+1] Shape: (nb_atoms, nb_atoms)
403

404
        sann_CNs: Numpy 1D array containing SANN-based coordination numbers (
405
        i.e. number of neighbors).
406

407
        radius_eps: Convergence threshold used for stopping the
408
        self-consistent radius computation. Default: 1e-2
409

410
    Returns: list_asann_CN : Numpy 1D array containing ASANN-based
411
    coordination numbers (i.e. number of neighbors, with an angular
412
    correction term).
413

414
        list_asann_radius : Numpy 1D array containing ASANN-based
415
        coordination radius (i.e. coordination sphere radius).
416

417
    Computational details: SC-ASANN-based coordination number is defined as
418
    the maximum coordination number m' such that forall m>=m', R_ang_i_m =
419
    sum(r_ij, j=1..m)/(m-2(1-ang_corr)) < r_i(m+1)
420

421
        Note that the angular correction term is computed here using the ASANN radius (defined itself from the angular correction term). In this approach, the angular correction term is computed in a self-consistent fashion.
422
    """
423

    
424
    # Create coordination number vector (i-th element is the coordination
425
    # number of the i-th atom) and coordination radius
426
    list_asann_CN = list()
427
    list_asann_radius = list()
428
    list_vectors = list()
429

    
430
    # Treat each atom separately (since CN can be different for each atom,
431
    # Numpy methods are unsuited here)
432
    for (atom_distances, atom_neighbors, sann_CN) in zip(sorted_distances,
433
                                                         sorted_vectors,
434
                                                         sann_CNs):
435
        asann_CN = sann_CN  # Set initial CN to 1 above the maximum CN that
436
        # can break ASANN relation (i.e. sann_CN-1)
437
        radius = 0
438
        prev_radius = 0
439

    
440
        # ASANN update algorithm (i.e. while ASANN radius sum(r_ij, j=1..m)/(
441
        # m-2(1-ang_corr)) < r_i(m+1), decrease m by 1)
442
        while True:
443
            # Extract properties of nearest neighbors
444
            nearest_distances = atom_distances[1:asann_CN + 1]
445
            nearest_neighbors = atom_neighbors[1:asann_CN + 1]
446

    
447
            # Computes radius iteratively
448
            sum_nearest = np.sum(
449
                nearest_distances)  # store sum of nearest distances
450
            radius = sum_nearest / (
451
                    asann_CN - 2)  # set initial radius to SANN value
452
            delta_radius = math.inf
453
            # Update radius until convergence
454
            while delta_radius > radius_eps:
455
                delta_radius = sum_nearest / (asann_CN - 2 * (
456
                        1 - angular_correction(nearest_neighbors,
457
                                               nearest_distances,
458
                                               radius)[0])) - radius  #
459
                # new_radius(old_radius) - old_radius
460
                vec = angular_correction(nearest_neighbors, nearest_distances,
461
                                         radius)[1]
462
                radius += delta_radius
463

    
464
            # Check if ASANN relation is broken
465
            if radius >= atom_distances[asann_CN + 1]:
466
                break
467
            asann_CN -= 1
468
            prev_radius = radius
469
            if asann_CN < 1:  # ASANN CN is not defined for less than 1
470
                # neighbor
471
                break
472

    
473
        # Store ASANN CN and radius found (before breaking the ASANN relation)
474
        list_asann_CN.append(asann_CN + 1)
475
        list_asann_radius.append(prev_radius)
476
        list_vectors.append(vec)
477

    
478
    return (np.array(list_asann_CN), np.array(list_asann_radius),
479
            np.array(list_vectors))
480

    
481

    
482
def convert_continuous(list_CN, list_radius, sorted_distances):
483
    """Convert integer coordination numbers into continuous decimal values.
484

485
    Parameters: list_CN: Numpy 1D array containing discretized coordination
486
    numbers (i.e. number of neighbors).
487

488
        list_radius: Numpy 1D array containing coordination radius (i.e.
489
        radius of the coordination spheres).
490

491
        sorted_distances: Numpy 2D array containing pairwise distances
492
        between coordinates, where each i-th row is sorted by increasing
493
        distance with respect to the i-th coordinates.
494

495
    Returns: list_continuous_CN: Numpy 1D array containing continuous
496
    coordination numbers.
497

498
        list_weights: 2D array containing the distance-related weights to
499
        apply to each neighbors of each atom.
500

501
    Computational details: In the discretized version, each neighbor is
502
    comptabilized exactly once. In the continuous version, the contribution
503
    of each neighbor is scaled by the following weight : SA/SA_max (where SA
504
    is the solid angle associated with the corresponding neighbor, and SA_max
505
    is the maximum solid angle (i.e. the solid angle associated with the
506
    nearest neighbor)).
507

508
        The continuous coordination number is then the sum of all weights.
509
    """
510

    
511
    # Create array to store continuous coordination numbers
512
    list_continuous_CN = list()
513
    list_weights = list()
514

    
515
    # Treat each atom separately (since CN can be different for each atom,
516
    # Numpy methods are unsuited here)
517
    for (atom_CN, atom_radius, atom_distances) in zip(list_CN, list_radius,
518
                                                      sorted_distances):
519
        # Extract distances of nearest neighbors
520
        nearest_distances = atom_distances[1:atom_CN + 1]
521

    
522
        # Compute solid angles of nearest neighbors
523
        nearest_SA = 1 - nearest_distances / atom_radius
524

    
525
        # Retrieve maximum solid angle (whose contribution will be 1)
526
        max_SA = nearest_SA[0]
527

    
528
        # Compute and store weights
529
        weights = nearest_SA / max_SA
530
        list_weights.append(weights)
531

    
532
        # Compute sum of contributions (i.e. continuous CN)
533
        continuous_CN = np.sum(weights)
534
        list_continuous_CN.append(continuous_CN)
535

    
536
    return np.array(list_continuous_CN), list_weights
537

    
538

    
539
def get_generalized(list_CN, list_weights, sorted_indexes, max_CN=None):
540
    """Convert coordination numbers into generalized coordination numbers.
541

542
    Parameters:
543
        list_CN: Numpy 1D array containing the coordination numbers.
544

545
        list_weights: 2D list containing the distance-related weights to
546
        apply to each neighbors of each atom. Note: In the case of
547
        discretized version, each weight is equal to 1.
548

549
        sorted_indexes: Numpy 2D array containing for each row the
550
        distances-sorted indexes of neighbors. In other words, the atom
551
        indexed sorted_indexes[i,j] is the j-th nearest neighbor of atom i.
552
        Shape = (nb_atoms, nb_coords)
553

554
        max_CN (int, optional): Value to use for the maximum coordination
555
        number, i.e. bulk coordination. If None, the maximum/bulk
556
        coordination number will be guessed. Default: None
557

558
    Returns: list_generalized_CN: Numpy 1D array containing generalized
559
    coordination numbers.
560

561
    Computational details: The generalized coordination number algorithm
562
    scales the contributions of each neighbor by the following weight :
563
    CN/CN_max (where CN is the coordination number associated with the
564
    corresponding neighbor, and CN_max is the maximum coordination achievable
565
    (i.e. bulk coordination)) The generalized coordination number is then the
566
    sum of all weights (GCN = sum(CN/max_CN, neighbors))
567

568
        This sum can be weighted futhermore (GCN = sum(CN/max_CN*SA/max_SA,
569
        neighbors)), using the continuous algorithm if requested (see `help(
570
        convert_continuous)` for more details on this algorithm).
571
    """
572

    
573
    # Create array to store generalized coordination numbers
574
    list_generalized_CN = list()
575

    
576
    # Retrieve maximal coordination number
577
    if max_CN is None:
578
        max_CN = max(list_CN)
579

    
580
    # Treat each atom separately (since CN can be different for each atom,
581
    # Numpy methods are unsuited here)
582
    for (weights, indexes) in zip(list_weights, sorted_indexes):
583
        # Initialize atom coordination number, and maximal coordination number
584
        atom_CN = 0
585

    
586
        # Loop over all neighbors, compute and add the corresponding weights
587
        for (weight, index) in zip(weights, indexes[1:]):
588
            try:
589
                neighbor_CN = list_CN[index]
590
            except IndexError:
591
                # if neighbor is virtual, use corresponding original one instead
592
                neighbor_CN = list_CN[index % sorted_indexes.shape[0]]
593
            atom_CN += weight * neighbor_CN
594

    
595
        # Divide by maximal coordination number
596
        list_generalized_CN.append(atom_CN / max_CN)
597

    
598
    return (np.array(list_generalized_CN))
599

    
600

    
601
def get_edges(list_CN, sorted_indexes, reduce_mode=None, nb_atoms=None):
602
    """Compute all edges corresponding with the connectivity graph of the
603
    given structure, based on discretized coordination numbers.
604

605
    Parameters: list_CN: Numpy 1D array containing the discretized
606
    coordination numbers (i.e. number of neighbors).
607

608
        sorted_indexes: Numpy 2D array containing for each row the
609
        distances-sorted indexes of neighbors. In other words, the atom
610
        indexed sorted_indexes[i,j] is the j-th nearest neighbor of atom i.
611
        Shape: (nb_atoms, nb_coords)
612

613
        reduce_mode: Edges counting mode. The ASANN/SANN algorithm can only
614
        find directed edges (i.e. find i->j but not j->i). This parameter
615
        defines the conversion mode from directed to undirected edges.
616
        Possible values: None: All directed edges are given, no reduction is
617
        performed. 'both': An undirected edge (i,j) is present only if both
618
        related directed edges (i->j and j->i) are found. 'any': An
619
        undirected edge (i,j) is present if any related directed edge (i->j
620
        or j->i) is found.
621

622
        nb_atoms (int, optional): Number of real atoms coordinates (i.e. for
623
        which distances must be computed). This is particularly useful for
624
        excluding periodic images coordinates as central atoms. If None,
625
        all coordinates are considered. Default: None
626

627
    Returns:
628
        list_edges: List containing all edges of the connectivity graph.
629
            The edges are in the form of a couple (index_node_i, index_node_j)
630
            Shape: (nb_bonds_found, 2)
631
    """
632

    
633
    # Create array to store all edges
634
    list_edges = list()
635

    
636
    # Treat each atom separately (since CN can be different for each atom,
637
    # Numpy methods are unsuited here)
638
    for (atom_CN, indexes) in zip(list_CN, sorted_indexes):
639
        # Retrieve current atom index
640
        index_i = indexes[0]
641
        # Loop over all neighbors, and add the corresponding edges
642
        for index_j in indexes[1:atom_CN + 1]:
643
            list_edges.append(
644
                (index_i, index_j) if reduce_mode is None else tuple(sorted((
645
                    index_i,
646
                    index_j))))  # add sorted edge instead (representing an
647
            # undirected edge) if conversion is required (reduce_mode not None)
648

    
649
    # Re-map to correct atom index if explicit periodic images are included
650
    if nb_atoms is not None:
651
        list_edges = [(index_i % nb_atoms, index_j % nb_atoms) for
652
                      (index_i, index_j) in list_edges]
653

    
654
    # Conversion of directed edges set to undirected edges set
655
    if reduce_mode == 'both':
656
        # Extract only edges that are present multiple times
657
        seen = dict()
658
        duplicates = []
659
        for edge in list_edges:
660
            if edge in seen:
661
                duplicates.append(edge)
662
            else:
663
                seen[edge] = None
664
        list_edges = duplicates
665
    elif reduce_mode == 'any':
666
        # Retrieve all unique undirected edges
667
        list_edges = list(set(list_edges))
668

    
669
    return (list_edges)
670

    
671

    
672
# FULL WRAPPER FUNCTION ##
673
def coordination_numbers(list_coords, pbc=True, cell_vectors=np.eye(3),
674
                         continuous=False, generalized=False, edges=True,
675
                         correction='ASANN', parallel=False, reduce_mode=None):
676
    """Computes coordination numbers according to the CASANN algorithm.
677

678
    Parameters:
679
        list_coords (2D array): List of atoms coordinates to
680
        consider. Important: direct coordinates are expected (torus topology of
681
        side 1), unless `pbc` is set to False. Note: This will be converted into
682
        a numpy.ndarray. Shape expected: (nb_atoms, nb_dim), where nb_atoms is
683
        the number of atoms considered, and nb_dim is the dimensionnality of the
684
        system.
685

686
        pbc (bool or str, optional): Determines if periodic boundaries
687
        conditions should be applied. Default to True. If True, coordinates
688
        are interpreted as direct coordinates and the distance between points
689
        is computed as the minimum euclidian distance between all duplicates
690
        (due to periodic boundaries conditions) of these points. If False,
691
        coordinates are interpreted as cartesian coordinates and the metric
692
        used is simply the euclidian distance. To explicitely include all
693
        adjacent periodic images (not only the minimum image convention) set
694
        `pbc` to 'adjacent'. This mode is particularly pertinent for small
695
        enough cells, but increases 9-fold the number of atoms. To
696
        explicitely include all surrounding periodic images set `pbc` to
697
        'full'. This mode is particularly pertinent for very small cells,
698
        but increases 27-fold the number of atoms. Note: This option implies
699
        the use of cell vectors (see `cell` parameter) for the computation of
700
        distance. Default: True.
701

702
        cell_vectors (2D array, optional): List of cell vectors to consider
703
        when periodic boundaries conditions are considered. Note: This will
704
        be converted into a numpy.ndarray. Shape expected: (nb_dim, nb_dim),
705
        where nb_dim is the dimensionnality of the system considered.
706
        Important: For this parameter to be taken into account, `pbc` must be
707
        set to True. Exemple: cell_vectors=[[v1_x, v1_y, v1_z], [v2_x, v2_y,
708
        v2_z], [v3_x, v3_y, v3_z]] Default: numpy.eye(3) (i.e. a cubic cell
709
        of side 1).
710

711
        continuous (bool, optional): If True, computes continuous
712
        coordination numbers. If False, computes discretized coordination
713
        numbers. Default to True. In the discretized version,
714
        the coordination number is equal to the number of detected neighbors.
715
        In the continuous version, each neighbors' contribution to the
716
        coordination number is 1 weighted by SA/SA_max, where SA is the solid
717
        angle corresponding to this neighbor and SA_max is the solid angle
718
        corresponding to the nearest neighbor (i.e. the maximum solid angle
719
        amongs all neighbors detected). Default: False.
720

721
        generalized (bool, optional): If True, computes generalized
722
        coordination numbers, where each neighbor is weighted by its own
723
        coordination number Default: False.
724

725
        edges (bool, optional): If True, computes edges of the connectivity
726
        graph defined by the discretized coordination numbers computed.
727
        Default: True.
728

729
        correction (str, optional): Determines if a correction term should be
730
        used. Default to 'ASANN'. The SANN algorithm suffers
731
        overdetermination of the coordination numbers at interfaces (i.e.
732
        high density gradient) basically because neighbors are always
733
        expected to fill the whole neighboring space (i.e. the sum of solid
734
        angles must be 4π), but at interfaces, neighbors are only expected to
735
        fill a portion of that space depending on the geometry of the
736
        interface. Possible values: - 'ASANN': If `correction` is set to
737
        'ASANN', the total sum of solid angles is rescaled by the ASANN
738
        angular correction term. - 'SC-ASANN': If `correction` is set to
739
        'SC-ASANN', the total sum of solid angles is rescaled by the ASANN
740
        angular correction term, computed in a self-consistent manner.
741
        Arguably better CNs, but more time consuming and less regularized
742
        continuous CNs. - None (or anything else): No correction term
743
        applied. This is equivalent to the SANN algorithm.
744

745
        reduce_mode: Edges counting mode. The ASANN/SANN algorithm can only
746
        find directed edges (e.g. find i->j but not j->i). This parameter
747
        defines the conversion mode from directed to undirected edges.
748
        Possible values: - 'both': An undirected edge (i,j) is present only
749
        if both related directed edges (i->j and j->i) are found. - 'any': An
750
        undirected edge (i,j) is present if any related directed edge (i->j
751
        or j->i) is found. - None: All directed edges are given, no reduction
752
        is performed. Default: None.
753

754
    Returns: (asann_CNs, asann_radius, asann_edges) asann_CNs (numpy array):
755
    list of coordination numbers computed. The order is the same as provided
756
    in list_coords. asann_radius (numpy array): list of coordination radii
757
    computed. The order is the same as provided in list_coords. asann_edges (
758
    list of tuples): list of edges found. Each edge is represented as a tuple
759
    (i,j), meaning that j was found as a neighbor of i. Note: if `edges` is
760
    set to False, asann_edges is set to None.
761

762
    Computational details:
763
        Correction:
764

765
            The SANN algorithm computes the coordination number (i.e. CN) m of
766
            atom i, as the minimum integer m>=3 satisfying R_i_m = sum(r_ij,
767
            j=1..m)/(m-2) < r_i(m+1) (assuming r_ij are sorted: r_i(j-1) <=
768
            r_ij <= r_i(j+1))
769

770
            This formula is equivalent to determining a sphere of radius
771
            R_i_m, such that the sum of all solid angles of inner atoms is 4π.
772
            The solid angle of atom j with respect to atom i, is the solid
773
            angle of the spherical cap of height r_ij on the sphere of radius
774
            R_i_m (i.e. SA_ij = 2π(1-r_ij/R_i_m)). However, at interfaces,
775
            it is expected that neighbors do not fill the whole space (i.e.
776
            the sum of solid angles should not be 4π)
777

778
            Hence we propose here an angular correction which is exact in the
779
            case of infinitely evenly distributed neighbors along a spherical
780
            cap. Indeed, assuming that a normal vector can be meaningfully
781
            defined at the interface, a correction is needed when the
782
            neighbors barycenter is far from the central atom.
783

784
            Therefore, the neighbors barycenter is computed. From this
785
            barycenter, one deduces the corresponding spherical cap on the
786
            sphere of radius R_i_m. Its solid angle is then taken instead of
787
            4π.
788

789
            Consequently, this angular correction assumes a spherical
790
            cap-like distribution of the nearest neighbors.
791

792
        Continuous:
793

794
            The continuous coordination number algorithm scales the
795
            contributions of each neighbor by the following weight :
796
            SA/SA_max (where SA is the solid angle associated with the
797
            corresponding neighbor, and SA_max is the maximum solid angle (
798
            i.e. the solid angle associated with the nearest neighbor)) The
799
            continuous coordination number is then the sum of all weights.
800

801
        Generalized:
802

803
            The generalized coordination number algorithm scales the
804
            contributions of each neighbor by the following weight :
805
            CN/CN_max (where CN is the coordination number associated with
806
            the corresponding neighbor, and CN_max is the maximum
807
            coordination number (i.e. the coordination number associated with
808
            the most coordinated neighbor)) The generalized coordination
809
            number is then the sum of all weights (weighted futhermore,
810
            or not, by the continuous algorithm).
811
    """
812

    
813
    # Conversion to numpy.ndarray
814
    coords = np.array(list_coords)
815
    if pbc:
816
        cell = np.array(cell_vectors)
817
    else:
818
        cell = None
819
    nb_atoms = None
820

    
821
    # Retrieve parameters and check dimension consistency
822
    assert ((not pbc) or (coords.shape[1] == cell.shape[0] == cell.shape[1]))
823

    
824
    # Explicitely add adjacent periodic images if requested
825
    if pbc in ('adjacent', 'full'):
826
        nb_atoms, coords = add_periodic_images(coords, cell, pbc)
827

    
828
    # Retrieve distance-sorted pairwise distances, vectors and indexes
829
    sorted_distances, sorted_vectors, sorted_indexes = get_sorted_distances(
830
        coords, pbc, nb_atoms=nb_atoms, cell=cell)
831

    
832
    # Retrieve number of nearest neighbors and coordination radius with SANN
833
    # algorithm
834
    asann_CNs, asann_radius = get_SANN(sorted_distances)
835

    
836
    # Apply angular correction if requested
837
    if correction == 'ASANN':
838
        asann_CNs, asann_radius, vectors = get_ASANN(sorted_distances,
839
                                                     sorted_vectors, asann_CNs,
840
                                                     asann_radius)
841
    elif correction == 'SC-ASANN':
842
        asann_CNs, asann_radius, vectors = get_self_consistent_ASANN(
843
            sorted_distances, sorted_vectors, asann_CNs)
844

    
845
    # Compute edges
846
    if edges:
847
        asann_edges = get_edges(asann_CNs, sorted_indexes,
848
                                reduce_mode=reduce_mode, nb_atoms=nb_atoms)
849
    else:
850
        asann_edges = None
851

    
852
    # Convert coordination numbers into continuous values if requested
853
    if continuous:
854
        # Compute continuous CN by weighting each neighbor contribution
855
        asann_CNs, list_weights = convert_continuous(asann_CNs, asann_radius,
856
                                                     sorted_distances)
857
    elif generalized:
858
        list_weights = [[1] * asann_CN for asann_CN in asann_CNs]
859

    
860
    # Compute generalized coordination numbers if requested
861
    if generalized:
862
        asann_CNs = get_generalized(asann_CNs, list_weights, sorted_indexes)
863

    
864
    return asann_CNs, asann_radius, asann_edges, vectors
865

    
866

    
867
# Program being executed when used as a script (instead of a module)
868
if __name__ == '__main__':
869
    # Imports
870
    import sys
871

    
872
    # Import local structure reader sub-module
873
    try:
874
        from structure_reader import structure_from_file
875
    except ImportError as err:
876
        print(
877
            'Unable to find file structure_reader.py, which allows reading '
878
            'molecular structures. Aborting.',
879
            file=sys.stderr)
880
        print(
881
            'Plase copy structure_reader.py in either the same folder '
882
            'containing this script ({}), or in your working '
883
            'directory'.format(
884
                sys.argv[0]), file=sys.stderr)
885

    
886
        raise err
887

    
888
    # Retrieve filename of molecular structure to treat
889
    try:
890
        filename = sys.argv[1]
891
    except IndexError as err:
892
        print(
893
            'Unable to parse a filename to treat (containing coordinates in '
894
            'supported format: XYZ, POSCAR, CIF, ...)',
895
            file=sys.stderr)
896
        print('Usage: {} filename'.format(sys.argv[0]), file=sys.stderr)
897
        raise err
898

    
899
    # Read file structure
900
    file_structure = structure_from_file(filename)
901
    coordinates = file_structure.get_coords()
902
    cell_vectors = file_structure.get_cell_matrix()
903
    pbc_mode = file_structure.pbc_enabled
904

    
905
    # Explicitely add next periodic images if number of atoms is too low. A
906
    # simple modulo operation is not enough when a single point is found as
907
    # neighbor more than once (as part of periodic images)
908
    if pbc_mode and len(coordinates) < 100:
909
        pbc_mode = 'full'  # Explictely include all 27 next periodic cells
910
    elif pbc_mode and len(coordinates) < 1000:
911
        pbc_mode = 'adjacent'  # Explicitely include all 9 adjacent cells
912

    
913
    asann_CNs, asann_radii, asann_edges, vectors = coordination_numbers(
914
        coordinates, pbc=pbc_mode, cell_vectors=cell_vectors, continuous=False,
915
        generalized=False, edges=True, correction='ASANN')
916

    
917
    print('ASANN vectors')
918
    for l in vectors:
919
        print(-l[0], -l[1], -l[2])