Coverage for /builds/ase/ase/ase/ga/ofp_comparator.py : 48.11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import numpy as np
2from itertools import combinations_with_replacement
3from math import erf
4from scipy.spatial.distance import cdist
5from ase.neighborlist import NeighborList
6from ase.utils import pbc2pbc
9class OFPComparator:
10 """Implementation of comparison using Oganov's fingerprint (OFP)
11 functions, based on:
13 * :doi:`Oganov, Valle, J. Chem. Phys. 130, 104504 (2009)
14 <10.1063/1.3079326>`
16 * :doi:`Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632
17 <10.1016/j.cpc.2010.06.007>`
19 Parameters:
21 n_top: int or None
22 The number of atoms to optimize (None = include all).
24 dE: float
25 Energy difference above which two structures are
26 automatically considered to be different. (Default 1 eV)
28 cos_dist_max: float
29 Maximal cosine distance between two structures in
30 order to be still considered the same structure. Default 5e-3
32 rcut: float
33 Cutoff radius in Angstrom for the fingerprints.
34 (Default 20 Angstrom)
36 binwidth: float
37 Width in Angstrom of the bins over which the fingerprints
38 are discretized. (Default 0.05 Angstrom)
40 pbc: list of three booleans or None
41 Specifies whether to apply periodic boundary conditions
42 along each of the three unit cell vectors when calculating
43 the fingerprint. The default (None) is to apply PBCs in all
44 3 directions.
46 Note: for isolated systems (pbc = [False, False, False]),
47 the pair correlation function itself is always short-ranged
48 (decays to zero beyond a certain radius), so unity is not
49 subtracted for calculating the fingerprint. Also the
50 volume normalization disappears.
52 maxdims: list of three floats or None
53 If PBCs in only 1 or 2 dimensions are specified, the
54 maximal thicknesses along the non-periodic directions can
55 be specified here (the values given for the periodic
56 directions will not be used). If set to None (the
57 default), the length of the cell vector along the
58 non-periodic direction is used.
60 Note: in this implementation, the cell vectors are
61 assumed to be orthogonal.
63 sigma: float
64 Standard deviation of the gaussian smearing to be applied
65 in the calculation of the fingerprints (in
66 Angstrom). Default 0.02 Angstrom.
68 nsigma: int
69 Distance (as the number of standard deviations sigma) at
70 which the gaussian smearing is cut off (i.e. no smearing
71 beyond that distance). (Default 4)
73 recalculate: boolean
74 If True, ignores the fingerprints stored in
75 atoms.info and recalculates them. (Default False)
77 """
79 def __init__(self, n_top=None, dE=1.0, cos_dist_max=5e-3, rcut=20.,
80 binwidth=0.05, sigma=0.02, nsigma=4, pbc=True,
81 maxdims=None, recalculate=False):
82 self.n_top = n_top or 0
83 self.dE = dE
84 self.cos_dist_max = cos_dist_max
85 self.rcut = rcut
86 self.binwidth = binwidth
87 self.pbc = pbc2pbc(pbc)
89 if maxdims is None:
90 self.maxdims = [None] * 3
91 else:
92 self.maxdims = maxdims
94 self.sigma = sigma
95 self.nsigma = nsigma
96 self.recalculate = recalculate
97 self.dimensions = self.pbc.sum()
99 if self.dimensions == 1 or self.dimensions == 2:
100 for direction in range(3):
101 if not self.pbc[direction]:
102 if self.maxdims[direction] is not None:
103 if self.maxdims[direction] <= 0:
104 e = '''If a max thickness is specificed in maxdims
105 for a non-periodic direction, it has to be
106 strictly positive.'''
107 raise ValueError(e)
109 def looks_like(self, a1, a2):
110 """ Return if structure a1 or a2 are similar or not. """
111 if len(a1) != len(a2):
112 raise Exception('The two configurations are not the same size.')
114 # first we check the energy criteria
115 if a1.calc is not None and a2.calc is not None:
116 dE = abs(a1.get_potential_energy() - a2.get_potential_energy())
117 if dE >= self.dE:
118 return False
120 # then we check the structure
121 cos_dist = self._compare_structure(a1, a2)
122 verdict = cos_dist < self.cos_dist_max
123 return verdict
125 def _json_encode(self, fingerprints, typedic):
126 """ json does not accept tuples nor integers as dict keys,
127 so in order to write the fingerprints to atoms.info, we need
128 to convert them to strings """
129 fingerprints_encoded = {}
130 for key, val in fingerprints.items():
131 try:
132 newkey = "_".join(map(str, list(key)))
133 except TypeError:
134 newkey = str(key)
135 if isinstance(val, dict):
136 fingerprints_encoded[newkey] = {}
137 for key2, val2 in val.items():
138 fingerprints_encoded[newkey][str(key2)] = val2
139 else:
140 fingerprints_encoded[newkey] = val
141 typedic_encoded = {}
142 for key, val in typedic.items():
143 newkey = str(key)
144 typedic_encoded[newkey] = val
145 return [fingerprints_encoded, typedic_encoded]
147 def _json_decode(self, fingerprints, typedic):
148 """ This is the reverse operation of _json_encode """
149 fingerprints_decoded = {}
150 for key, val in fingerprints.items():
151 newkey = list(map(int, key.split("_")))
152 if len(newkey) > 1:
153 newkey = tuple(newkey)
154 else:
155 newkey = newkey[0]
157 if isinstance(val, dict):
158 fingerprints_decoded[newkey] = {}
159 for key2, val2 in val.items():
160 fingerprints_decoded[newkey][int(key2)] = np.array(val2)
161 else:
162 fingerprints_decoded[newkey] = np.array(val)
163 typedic_decoded = {}
164 for key, val in typedic.items():
165 newkey = int(key)
166 typedic_decoded[newkey] = val
167 return [fingerprints_decoded, typedic_decoded]
169 def _compare_structure(self, a1, a2):
170 """ Returns the cosine distance between the two structures,
171 using their fingerprints. """
173 if len(a1) != len(a2):
174 raise Exception('The two configurations are not the same size.')
176 a1top = a1[-self.n_top:]
177 a2top = a2[-self.n_top:]
179 if 'fingerprints' in a1.info and not self.recalculate:
180 fp1, typedic1 = a1.info['fingerprints']
181 fp1, typedic1 = self._json_decode(fp1, typedic1)
182 else:
183 fp1, typedic1 = self._take_fingerprints(a1top)
184 a1.info['fingerprints'] = self._json_encode(fp1, typedic1)
186 if 'fingerprints' in a2.info and not self.recalculate:
187 fp2, typedic2 = a2.info['fingerprints']
188 fp2, typedic2 = self._json_decode(fp2, typedic2)
189 else:
190 fp2, typedic2 = self._take_fingerprints(a2top)
191 a2.info['fingerprints'] = self._json_encode(fp2, typedic2)
193 if sorted(fp1) != sorted(fp2):
194 raise AssertionError('The two structures have fingerprints '
195 'with different compounds.')
196 for key in typedic1:
197 if not np.array_equal(typedic1[key], typedic2[key]):
198 raise AssertionError('The two structures have a different '
199 'stoichiometry or ordering!')
201 cos_dist = self._cosine_distance(fp1, fp2, typedic1)
202 return cos_dist
204 def _get_volume(self, a):
205 ''' Calculates the normalizing value, and other parameters
206 (pmin,pmax,qmin,qmax) that are used for surface area calculation
207 in the case of 1 or 2-D periodicity.'''
209 cell = a.get_cell()
210 scalpos = a.get_scaled_positions()
212 # defaults:
213 volume = 1.
214 pmin, pmax, qmin, qmax = [0.] * 4
216 if self.dimensions == 1 or self.dimensions == 2:
217 for direction in range(3):
218 if not self.pbc[direction]:
219 if self.maxdims[direction] is None:
220 maxdim = np.linalg.norm(cell[direction, :])
221 self.maxdims[direction] = maxdim
223 pbc_dirs = [i for i in range(3) if self.pbc[i]]
224 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]]
226 if self.dimensions == 3:
227 volume = abs(np.dot(np.cross(cell[0, :], cell[1, :]), cell[2, :]))
229 elif self.dimensions == 2:
230 non_pbc_dir = non_pbc_dirs[0]
232 a = np.cross(cell[pbc_dirs[0], :], cell[pbc_dirs[1], :])
233 b = self.maxdims[non_pbc_dir]
234 b /= np.linalg.norm(cell[non_pbc_dir, :])
236 volume = np.abs(np.dot(a, b * cell[non_pbc_dir, :]))
238 maxpos = np.max(scalpos[:, non_pbc_dir])
239 minpos = np.min(scalpos[:, non_pbc_dir])
240 pwidth = maxpos - minpos
241 pmargin = 0.5 * (b - pwidth)
242 # note: here is a place where we assume that the
243 # non-periodic direction is orthogonal to the periodic ones:
244 pmin = np.min(scalpos[:, non_pbc_dir]) - pmargin
245 pmin *= np.linalg.norm(cell[non_pbc_dir, :])
246 pmax = np.max(scalpos[:, non_pbc_dir]) + pmargin
247 pmax *= np.linalg.norm(cell[non_pbc_dir, :])
249 elif self.dimensions == 1:
250 pbc_dir = pbc_dirs[0]
252 v0 = cell[non_pbc_dirs[0], :]
253 b0 = self.maxdims[non_pbc_dirs[0]]
254 b0 /= np.linalg.norm(cell[non_pbc_dirs[0], :])
255 v1 = cell[non_pbc_dirs[1], :]
256 b1 = self.maxdims[non_pbc_dirs[1]]
257 b1 /= np.linalg.norm(cell[non_pbc_dirs[1], :])
259 volume = np.abs(np.dot(np.cross(b0 * v0, b1 * v1),
260 cell[pbc_dir, :]))
262 # note: here is a place where we assume that the
263 # non-periodic direction is orthogonal to the periodic ones:
264 maxpos = np.max(scalpos[:, non_pbc_dirs[0]])
265 minpos = np.min(scalpos[:, non_pbc_dirs[0]])
266 pwidth = maxpos - minpos
267 pmargin = 0.5 * (b0 - pwidth)
269 pmin = np.min(scalpos[:, non_pbc_dirs[0]]) - pmargin
270 pmin *= np.linalg.norm(cell[non_pbc_dirs[0], :])
271 pmax = np.max(scalpos[:, non_pbc_dirs[0]]) + pmargin
272 pmax *= np.linalg.norm(cell[non_pbc_dirs[0], :])
274 maxpos = np.max(scalpos[:, non_pbc_dirs[1]])
275 minpos = np.min(scalpos[:, non_pbc_dirs[1]])
276 qwidth = maxpos - minpos
277 qmargin = 0.5 * (b1 - qwidth)
279 qmin = np.min(scalpos[:, non_pbc_dirs[1]]) - qmargin
280 qmin *= np.linalg.norm(cell[non_pbc_dirs[1], :])
281 qmax = np.max(scalpos[:, non_pbc_dirs[1]]) + qmargin
282 qmax *= np.linalg.norm(cell[non_pbc_dirs[1], :])
284 elif self.dimensions == 0:
285 volume = 1.
287 return [volume, pmin, pmax, qmin, qmax]
289 def _take_fingerprints(self, atoms, individual=False):
290 """ Returns a [fingerprints,typedic] list, where fingerprints
291 is a dictionary with the fingerprints, and typedic is a
292 dictionary with the list of atom indices for each element
293 (or "type") in the atoms object.
294 The keys in the fingerprints dictionary are the (A,B) tuples,
295 which are the different element-element combinations in the
296 atoms object (A and B are the atomic numbers).
297 When A != B, the (A,B) tuple is sorted (A < B).
299 If individual=True, a dict is returned, where each atom index
300 has an {atomic_number:fingerprint} dict as value.
301 If individual=False, the fingerprints from atoms of the same
302 atomic number are added together."""
304 pos = atoms.get_positions()
305 num = atoms.get_atomic_numbers()
306 cell = atoms.get_cell()
308 unique_types = np.unique(num)
309 posdic = {}
310 typedic = {}
311 for t in unique_types:
312 tlist = [i for i, atom in enumerate(atoms) if atom.number == t]
313 typedic[t] = tlist
314 posdic[t] = pos[tlist]
316 # determining the volume normalization and other parameters
317 volume, pmin, pmax, qmin, qmax = self._get_volume(atoms)
319 # functions for calculating the surface area
320 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]]
322 def surface_area_0d(r):
323 return 4 * np.pi * (r**2)
325 def surface_area_1d(r, pos):
326 q0 = pos[non_pbc_dirs[1]]
327 phi1 = np.lib.scimath.arccos((qmax - q0) / r).real
328 phi2 = np.pi - np.lib.scimath.arccos((qmin - q0) / r).real
329 factor = 1 - (phi1 + phi2) / np.pi
330 return surface_area_2d(r, pos) * factor
332 def surface_area_2d(r, pos):
333 p0 = pos[non_pbc_dirs[0]]
334 area = np.minimum(pmax - p0, r) + np.minimum(p0 - pmin, r)
335 area *= 2 * np.pi * r
336 return area
338 def surface_area_3d(r):
339 return 4 * np.pi * (r**2)
341 # build neighborlist
342 # this is computationally the most intensive part
343 a = atoms.copy()
344 a.set_pbc(self.pbc)
345 nl = NeighborList([self.rcut / 2.] * len(a), skin=0.,
346 self_interaction=False, bothways=True)
347 nl.update(a)
349 # parameters for the binning:
350 m = int(np.ceil(self.nsigma * self.sigma / self.binwidth))
351 x = 0.25 * np.sqrt(2) * self.binwidth * (2 * m + 1) * 1. / self.sigma
352 smearing_norm = erf(x)
353 nbins = int(np.ceil(self.rcut * 1. / self.binwidth))
354 bindist = self.binwidth * np.arange(1, nbins + 1)
356 def take_individual_rdf(index, unique_type):
357 # Computes the radial distribution function of atoms
358 # of type unique_type around the atom with index "index".
359 rdf = np.zeros(nbins)
361 if self.dimensions == 3:
362 weights = 1. / surface_area_3d(bindist)
363 elif self.dimensions == 2:
364 weights = 1. / surface_area_2d(bindist, pos[index])
365 elif self.dimensions == 1:
366 weights = 1. / surface_area_1d(bindist, pos[index])
367 elif self.dimensions == 0:
368 weights = 1. / surface_area_0d(bindist)
369 weights /= self.binwidth
371 indices, offsets = nl.get_neighbors(index)
372 valid = np.where(num[indices] == unique_type)
373 p = pos[indices[valid]] + np.dot(offsets[valid], cell)
374 r = cdist(p, [pos[index]])
375 bins = np.floor(r / self.binwidth)
377 for i in range(-m, m + 1):
378 newbins = bins + i
379 valid = np.where((newbins >= 0) & (newbins < nbins))
380 valid_bins = newbins[valid].astype(int)
381 values = weights[valid_bins]
383 c = 0.25 * np.sqrt(2) * self.binwidth * 1. / self.sigma
384 values *= 0.5 * erf(c * (2 * i + 1)) - \
385 0.5 * erf(c * (2 * i - 1))
386 values /= smearing_norm
388 for j, valid_bin in enumerate(valid_bins):
389 rdf[valid_bin] += values[j]
391 rdf /= len(typedic[unique_type]) * 1. / volume
392 return rdf
394 fingerprints = {}
395 if individual:
396 for i in range(len(atoms)):
397 fingerprints[i] = {}
398 for unique_type in unique_types:
399 fingerprint = take_individual_rdf(i, unique_type)
400 if self.dimensions > 0:
401 fingerprint -= 1
402 fingerprints[i][unique_type] = fingerprint
403 else:
404 for t1, t2 in combinations_with_replacement(unique_types, r=2):
405 key = (t1, t2)
406 fingerprint = np.zeros(nbins)
407 for i in typedic[t1]:
408 fingerprint += take_individual_rdf(i, t2)
409 fingerprint /= len(typedic[t1])
410 if self.dimensions > 0:
411 fingerprint -= 1
412 fingerprints[key] = fingerprint
414 return [fingerprints, typedic]
416 def _calculate_local_orders(self, individual_fingerprints, typedic,
417 volume):
418 """ Returns a list with the local order for every atom,
419 using the definition of local order from
420 Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632
421 :doi:`10.1016/j.cpc.2010.06.007`"""
423 # total number of atoms:
424 n_tot = sum([len(typedic[key]) for key in typedic])
426 local_orders = []
427 for index, fingerprints in individual_fingerprints.items():
428 local_order = 0
429 for unique_type, fingerprint in fingerprints.items():
430 term = np.linalg.norm(fingerprint)**2
431 term *= self.binwidth
432 term *= (volume * 1. / n_tot)**3
433 term *= len(typedic[unique_type]) * 1. / n_tot
434 local_order += term
435 local_orders.append(np.sqrt(local_order))
437 return local_orders
439 def get_local_orders(self, a):
440 """ Returns the local orders of all the atoms."""
442 a_top = a[-self.n_top:]
443 key = 'individual_fingerprints'
445 if key in a.info and not self.recalculate:
446 fp, typedic = self._json_decode(*a.info[key])
447 else:
448 fp, typedic = self._take_fingerprints(a_top, individual=True)
449 a.info[key] = self._json_encode(fp, typedic)
451 volume, pmin, pmax, qmin, qmax = self._get_volume(a_top)
452 return self._calculate_local_orders(fp, typedic, volume)
454 def _cosine_distance(self, fp1, fp2, typedic):
455 """ Returns the cosine distance from two fingerprints.
456 It also needs information about the number of atoms from
457 each element, which is included in "typedic"."""
459 keys = sorted(fp1)
461 # calculating the weights:
462 w = {}
463 wtot = 0
464 for key in keys:
465 weight = len(typedic[key[0]]) * len(typedic[key[1]])
466 wtot += weight
467 w[key] = weight
468 for key in keys:
469 w[key] *= 1. / wtot
471 # calculating the fingerprint norms:
472 norm1 = 0
473 norm2 = 0
474 for key in keys:
475 norm1 += (np.linalg.norm(fp1[key])**2) * w[key]
476 norm2 += (np.linalg.norm(fp2[key])**2) * w[key]
477 norm1 = np.sqrt(norm1)
478 norm2 = np.sqrt(norm2)
480 # calculating the distance:
481 distance = 0
482 for key in keys:
483 distance += np.sum(fp1[key] * fp2[key]) * w[key] / (norm1 * norm2)
485 distance = 0.5 * (1 - distance)
486 return distance
488 def plot_fingerprints(self, a, prefix=''):
489 """ Function for quickly plotting all the fingerprints.
490 Prefix = a prefix you want to give to the resulting PNG file."""
491 try:
492 import matplotlib.pyplot as plt
493 except ImportError:
494 Warning("Matplotlib could not be loaded - plotting won't work")
495 raise
497 if 'fingerprints' in a.info and not self.recalculate:
498 fp, typedic = a.info['fingerprints']
499 fp, typedic = self._json_decode(fp, typedic)
500 else:
501 a_top = a[-self.n_top:]
502 fp, typedic = self._take_fingerprints(a_top)
503 a.info['fingerprints'] = self._json_encode(fp, typedic)
505 npts = int(np.ceil(self.rcut * 1. / self.binwidth))
506 x = np.linspace(0, self.rcut, npts, endpoint=False)
508 for key, val in fp.items():
509 plt.plot(x, val)
510 suffix = "_fp_{0}_{1}.png".format(key[0], key[1])
511 plt.savefig(prefix + suffix)
512 plt.clf()
514 def plot_individual_fingerprints(self, a, prefix=''):
515 """ Function for plotting all the individual fingerprints.
516 Prefix = a prefix for the resulting PNG file."""
517 try:
518 import matplotlib.pyplot as plt
519 except ImportError:
520 Warning("Matplotlib could not be loaded - plotting won't work")
521 raise
523 if 'individual_fingerprints' in a.info and not self.recalculate:
524 fp, typedic = a.info['individual_fingerprints']
525 else:
526 a_top = a[-self.n_top:]
527 fp, typedic = self._take_fingerprints(a_top, individual=True)
528 a.info['individual_fingerprints'] = [fp, typedic]
530 npts = int(np.ceil(self.rcut * 1. / self.binwidth))
531 x = np.linspace(0, self.rcut, npts, endpoint=False)
533 for key, val in fp.items():
534 for key2, val2 in val.items():
535 plt.plot(x, val2)
536 plt.ylim([-1, 10])
537 suffix = "_individual_fp_{0}_{1}.png".format(key, key2)
538 plt.savefig(prefix + suffix)
539 plt.clf()