r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import numpy as np

2from itertools import combinations_with_replacement

3from math import erf

4from scipy.spatial.distance import cdist

5from ase.neighborlist import NeighborList

6from ase.utils import pbc2pbc

9class OFPComparator:

10 """Implementation of comparison using Oganov's fingerprint (OFP)

11 functions, based on:

13 * :doi:`Oganov, Valle, J. Chem. Phys. 130, 104504 (2009)

14 <10.1063/1.3079326>`

16 * :doi:`Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632

17 <10.1016/j.cpc.2010.06.007>`

19 Parameters:

21 n_top: int or None

22 The number of atoms to optimize (None = include all).

24 dE: float

25 Energy difference above which two structures are

26 automatically considered to be different. (Default 1 eV)

28 cos_dist_max: float

29 Maximal cosine distance between two structures in

30 order to be still considered the same structure. Default 5e-3

32 rcut: float

33 Cutoff radius in Angstrom for the fingerprints.

34 (Default 20 Angstrom)

36 binwidth: float

37 Width in Angstrom of the bins over which the fingerprints

38 are discretized. (Default 0.05 Angstrom)

40 pbc: list of three booleans or None

41 Specifies whether to apply periodic boundary conditions

42 along each of the three unit cell vectors when calculating

43 the fingerprint. The default (None) is to apply PBCs in all

44 3 directions.

46 Note: for isolated systems (pbc = [False, False, False]),

47 the pair correlation function itself is always short-ranged

48 (decays to zero beyond a certain radius), so unity is not

49 subtracted for calculating the fingerprint. Also the

50 volume normalization disappears.

52 maxdims: list of three floats or None

53 If PBCs in only 1 or 2 dimensions are specified, the

54 maximal thicknesses along the non-periodic directions can

55 be specified here (the values given for the periodic

56 directions will not be used). If set to None (the

57 default), the length of the cell vector along the

58 non-periodic direction is used.

60 Note: in this implementation, the cell vectors are

61 assumed to be orthogonal.

63 sigma: float

64 Standard deviation of the gaussian smearing to be applied

65 in the calculation of the fingerprints (in

66 Angstrom). Default 0.02 Angstrom.

68 nsigma: int

69 Distance (as the number of standard deviations sigma) at

70 which the gaussian smearing is cut off (i.e. no smearing

71 beyond that distance). (Default 4)

73 recalculate: boolean

74 If True, ignores the fingerprints stored in

75 atoms.info and recalculates them. (Default False)

77 """

79 def __init__(self, n_top=None, dE=1.0, cos_dist_max=5e-3, rcut=20.,

80 binwidth=0.05, sigma=0.02, nsigma=4, pbc=True,

81 maxdims=None, recalculate=False):

82 self.n_top = n_top or 0

83 self.dE = dE

84 self.cos_dist_max = cos_dist_max

85 self.rcut = rcut

86 self.binwidth = binwidth

87 self.pbc = pbc2pbc(pbc)

89 if maxdims is None:

90 self.maxdims = [None] * 3

91 else:

92 self.maxdims = maxdims

94 self.sigma = sigma

95 self.nsigma = nsigma

96 self.recalculate = recalculate

97 self.dimensions = self.pbc.sum()

99 if self.dimensions == 1 or self.dimensions == 2:

100 for direction in range(3):

101 if not self.pbc[direction]:

102 if self.maxdims[direction] is not None:

103 if self.maxdims[direction] <= 0:

104 e = '''If a max thickness is specificed in maxdims

105 for a non-periodic direction, it has to be

106 strictly positive.'''

107 raise ValueError(e)

109 def looks_like(self, a1, a2):

110 """ Return if structure a1 or a2 are similar or not. """

111 if len(a1) != len(a2):

112 raise Exception('The two configurations are not the same size.')

114 # first we check the energy criteria

115 if a1.calc is not None and a2.calc is not None:

116 dE = abs(a1.get_potential_energy() - a2.get_potential_energy())

117 if dE >= self.dE:

118 return False

120 # then we check the structure

121 cos_dist = self._compare_structure(a1, a2)

122 verdict = cos_dist < self.cos_dist_max

123 return verdict

125 def _json_encode(self, fingerprints, typedic):

126 """ json does not accept tuples nor integers as dict keys,

127 so in order to write the fingerprints to atoms.info, we need

128 to convert them to strings """

129 fingerprints_encoded = {}

130 for key, val in fingerprints.items():

131 try:

132 newkey = "_".join(map(str, list(key)))

133 except TypeError:

134 newkey = str(key)

135 if isinstance(val, dict):

136 fingerprints_encoded[newkey] = {}

137 for key2, val2 in val.items():

138 fingerprints_encoded[newkey][str(key2)] = val2

139 else:

140 fingerprints_encoded[newkey] = val

141 typedic_encoded = {}

142 for key, val in typedic.items():

143 newkey = str(key)

144 typedic_encoded[newkey] = val

145 return [fingerprints_encoded, typedic_encoded]

147 def _json_decode(self, fingerprints, typedic):

148 """ This is the reverse operation of _json_encode """

149 fingerprints_decoded = {}

150 for key, val in fingerprints.items():

151 newkey = list(map(int, key.split("_")))

152 if len(newkey) > 1:

153 newkey = tuple(newkey)

154 else:

155 newkey = newkey[0]

157 if isinstance(val, dict):

158 fingerprints_decoded[newkey] = {}

159 for key2, val2 in val.items():

160 fingerprints_decoded[newkey][int(key2)] = np.array(val2)

161 else:

162 fingerprints_decoded[newkey] = np.array(val)

163 typedic_decoded = {}

164 for key, val in typedic.items():

165 newkey = int(key)

166 typedic_decoded[newkey] = val

167 return [fingerprints_decoded, typedic_decoded]

169 def _compare_structure(self, a1, a2):

170 """ Returns the cosine distance between the two structures,

171 using their fingerprints. """

173 if len(a1) != len(a2):

174 raise Exception('The two configurations are not the same size.')

176 a1top = a1[-self.n_top:]

177 a2top = a2[-self.n_top:]

179 if 'fingerprints' in a1.info and not self.recalculate:

180 fp1, typedic1 = a1.info['fingerprints']

181 fp1, typedic1 = self._json_decode(fp1, typedic1)

182 else:

183 fp1, typedic1 = self._take_fingerprints(a1top)

184 a1.info['fingerprints'] = self._json_encode(fp1, typedic1)

186 if 'fingerprints' in a2.info and not self.recalculate:

187 fp2, typedic2 = a2.info['fingerprints']

188 fp2, typedic2 = self._json_decode(fp2, typedic2)

189 else:

190 fp2, typedic2 = self._take_fingerprints(a2top)

191 a2.info['fingerprints'] = self._json_encode(fp2, typedic2)

193 if sorted(fp1) != sorted(fp2):

194 raise AssertionError('The two structures have fingerprints '

195 'with different compounds.')

196 for key in typedic1:

197 if not np.array_equal(typedic1[key], typedic2[key]):

198 raise AssertionError('The two structures have a different '

199 'stoichiometry or ordering!')

201 cos_dist = self._cosine_distance(fp1, fp2, typedic1)

202 return cos_dist

204 def _get_volume(self, a):

205 ''' Calculates the normalizing value, and other parameters

206 (pmin,pmax,qmin,qmax) that are used for surface area calculation

207 in the case of 1 or 2-D periodicity.'''

209 cell = a.get_cell()

210 scalpos = a.get_scaled_positions()

212 # defaults:

213 volume = 1.

214 pmin, pmax, qmin, qmax = [0.] * 4

216 if self.dimensions == 1 or self.dimensions == 2:

217 for direction in range(3):

218 if not self.pbc[direction]:

219 if self.maxdims[direction] is None:

220 maxdim = np.linalg.norm(cell[direction, :])

221 self.maxdims[direction] = maxdim

223 pbc_dirs = [i for i in range(3) if self.pbc[i]]

224 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]]

226 if self.dimensions == 3:

227 volume = abs(np.dot(np.cross(cell[0, :], cell[1, :]), cell[2, :]))

229 elif self.dimensions == 2:

230 non_pbc_dir = non_pbc_dirs[0]

232 a = np.cross(cell[pbc_dirs[0], :], cell[pbc_dirs[1], :])

233 b = self.maxdims[non_pbc_dir]

234 b /= np.linalg.norm(cell[non_pbc_dir, :])

236 volume = np.abs(np.dot(a, b * cell[non_pbc_dir, :]))

238 maxpos = np.max(scalpos[:, non_pbc_dir])

239 minpos = np.min(scalpos[:, non_pbc_dir])

240 pwidth = maxpos - minpos

241 pmargin = 0.5 * (b - pwidth)

242 # note: here is a place where we assume that the

243 # non-periodic direction is orthogonal to the periodic ones:

244 pmin = np.min(scalpos[:, non_pbc_dir]) - pmargin

245 pmin *= np.linalg.norm(cell[non_pbc_dir, :])

246 pmax = np.max(scalpos[:, non_pbc_dir]) + pmargin

247 pmax *= np.linalg.norm(cell[non_pbc_dir, :])

249 elif self.dimensions == 1:

250 pbc_dir = pbc_dirs[0]

252 v0 = cell[non_pbc_dirs[0], :]

253 b0 = self.maxdims[non_pbc_dirs[0]]

254 b0 /= np.linalg.norm(cell[non_pbc_dirs[0], :])

255 v1 = cell[non_pbc_dirs[1], :]

256 b1 = self.maxdims[non_pbc_dirs[1]]

257 b1 /= np.linalg.norm(cell[non_pbc_dirs[1], :])

259 volume = np.abs(np.dot(np.cross(b0 * v0, b1 * v1),

260 cell[pbc_dir, :]))

262 # note: here is a place where we assume that the

263 # non-periodic direction is orthogonal to the periodic ones:

264 maxpos = np.max(scalpos[:, non_pbc_dirs[0]])

265 minpos = np.min(scalpos[:, non_pbc_dirs[0]])

266 pwidth = maxpos - minpos

267 pmargin = 0.5 * (b0 - pwidth)

269 pmin = np.min(scalpos[:, non_pbc_dirs[0]]) - pmargin

270 pmin *= np.linalg.norm(cell[non_pbc_dirs[0], :])

271 pmax = np.max(scalpos[:, non_pbc_dirs[0]]) + pmargin

272 pmax *= np.linalg.norm(cell[non_pbc_dirs[0], :])

274 maxpos = np.max(scalpos[:, non_pbc_dirs[1]])

275 minpos = np.min(scalpos[:, non_pbc_dirs[1]])

276 qwidth = maxpos - minpos

277 qmargin = 0.5 * (b1 - qwidth)

279 qmin = np.min(scalpos[:, non_pbc_dirs[1]]) - qmargin

280 qmin *= np.linalg.norm(cell[non_pbc_dirs[1], :])

281 qmax = np.max(scalpos[:, non_pbc_dirs[1]]) + qmargin

282 qmax *= np.linalg.norm(cell[non_pbc_dirs[1], :])

284 elif self.dimensions == 0:

285 volume = 1.

287 return [volume, pmin, pmax, qmin, qmax]

289 def _take_fingerprints(self, atoms, individual=False):

290 """ Returns a [fingerprints,typedic] list, where fingerprints

291 is a dictionary with the fingerprints, and typedic is a

292 dictionary with the list of atom indices for each element

293 (or "type") in the atoms object.

294 The keys in the fingerprints dictionary are the (A,B) tuples,

295 which are the different element-element combinations in the

296 atoms object (A and B are the atomic numbers).

297 When A != B, the (A,B) tuple is sorted (A < B).

299 If individual=True, a dict is returned, where each atom index

300 has an {atomic_number:fingerprint} dict as value.

301 If individual=False, the fingerprints from atoms of the same

302 atomic number are added together."""

304 pos = atoms.get_positions()

305 num = atoms.get_atomic_numbers()

306 cell = atoms.get_cell()

308 unique_types = np.unique(num)

309 posdic = {}

310 typedic = {}

311 for t in unique_types:

312 tlist = [i for i, atom in enumerate(atoms) if atom.number == t]

313 typedic[t] = tlist

314 posdic[t] = pos[tlist]

316 # determining the volume normalization and other parameters

317 volume, pmin, pmax, qmin, qmax = self._get_volume(atoms)

319 # functions for calculating the surface area

320 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]]

322 def surface_area_0d(r):

323 return 4 * np.pi * (r**2)

325 def surface_area_1d(r, pos):

326 q0 = pos[non_pbc_dirs[1]]

327 phi1 = np.lib.scimath.arccos((qmax - q0) / r).real

328 phi2 = np.pi - np.lib.scimath.arccos((qmin - q0) / r).real

329 factor = 1 - (phi1 + phi2) / np.pi

330 return surface_area_2d(r, pos) * factor

332 def surface_area_2d(r, pos):

333 p0 = pos[non_pbc_dirs[0]]

334 area = np.minimum(pmax - p0, r) + np.minimum(p0 - pmin, r)

335 area *= 2 * np.pi * r

336 return area

338 def surface_area_3d(r):

339 return 4 * np.pi * (r**2)

341 # build neighborlist

342 # this is computationally the most intensive part

343 a = atoms.copy()

344 a.set_pbc(self.pbc)

345 nl = NeighborList([self.rcut / 2.] * len(a), skin=0.,

346 self_interaction=False, bothways=True)

347 nl.update(a)

349 # parameters for the binning:

350 m = int(np.ceil(self.nsigma * self.sigma / self.binwidth))

351 x = 0.25 * np.sqrt(2) * self.binwidth * (2 * m + 1) * 1. / self.sigma

352 smearing_norm = erf(x)

353 nbins = int(np.ceil(self.rcut * 1. / self.binwidth))

354 bindist = self.binwidth * np.arange(1, nbins + 1)

356 def take_individual_rdf(index, unique_type):

357 # Computes the radial distribution function of atoms

358 # of type unique_type around the atom with index "index".

359 rdf = np.zeros(nbins)

361 if self.dimensions == 3:

362 weights = 1. / surface_area_3d(bindist)

363 elif self.dimensions == 2:

364 weights = 1. / surface_area_2d(bindist, pos[index])

365 elif self.dimensions == 1:

366 weights = 1. / surface_area_1d(bindist, pos[index])

367 elif self.dimensions == 0:

368 weights = 1. / surface_area_0d(bindist)

369 weights /= self.binwidth

371 indices, offsets = nl.get_neighbors(index)

372 valid = np.where(num[indices] == unique_type)

373 p = pos[indices[valid]] + np.dot(offsets[valid], cell)

374 r = cdist(p, [pos[index]])

375 bins = np.floor(r / self.binwidth)

377 for i in range(-m, m + 1):

378 newbins = bins + i

379 valid = np.where((newbins >= 0) & (newbins < nbins))

380 valid_bins = newbins[valid].astype(int)

381 values = weights[valid_bins]

383 c = 0.25 * np.sqrt(2) * self.binwidth * 1. / self.sigma

384 values *= 0.5 * erf(c * (2 * i + 1)) - \

385 0.5 * erf(c * (2 * i - 1))

386 values /= smearing_norm

388 for j, valid_bin in enumerate(valid_bins):

389 rdf[valid_bin] += values[j]

391 rdf /= len(typedic[unique_type]) * 1. / volume

392 return rdf

394 fingerprints = {}

395 if individual:

396 for i in range(len(atoms)):

397 fingerprints[i] = {}

398 for unique_type in unique_types:

399 fingerprint = take_individual_rdf(i, unique_type)

400 if self.dimensions > 0:

401 fingerprint -= 1

402 fingerprints[i][unique_type] = fingerprint

403 else:

404 for t1, t2 in combinations_with_replacement(unique_types, r=2):

405 key = (t1, t2)

406 fingerprint = np.zeros(nbins)

407 for i in typedic[t1]:

408 fingerprint += take_individual_rdf(i, t2)

409 fingerprint /= len(typedic[t1])

410 if self.dimensions > 0:

411 fingerprint -= 1

412 fingerprints[key] = fingerprint

414 return [fingerprints, typedic]

416 def _calculate_local_orders(self, individual_fingerprints, typedic,

417 volume):

418 """ Returns a list with the local order for every atom,

419 using the definition of local order from

420 Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632

421 :doi:`10.1016/j.cpc.2010.06.007`"""

423 # total number of atoms:

424 n_tot = sum([len(typedic[key]) for key in typedic])

426 local_orders = []

427 for index, fingerprints in individual_fingerprints.items():

428 local_order = 0

429 for unique_type, fingerprint in fingerprints.items():

430 term = np.linalg.norm(fingerprint)**2

431 term *= self.binwidth

432 term *= (volume * 1. / n_tot)**3

433 term *= len(typedic[unique_type]) * 1. / n_tot

434 local_order += term

435 local_orders.append(np.sqrt(local_order))

437 return local_orders

439 def get_local_orders(self, a):

440 """ Returns the local orders of all the atoms."""

442 a_top = a[-self.n_top:]

443 key = 'individual_fingerprints'

445 if key in a.info and not self.recalculate:

446 fp, typedic = self._json_decode(*a.info[key])

447 else:

448 fp, typedic = self._take_fingerprints(a_top, individual=True)

449 a.info[key] = self._json_encode(fp, typedic)

451 volume, pmin, pmax, qmin, qmax = self._get_volume(a_top)

452 return self._calculate_local_orders(fp, typedic, volume)

454 def _cosine_distance(self, fp1, fp2, typedic):

455 """ Returns the cosine distance from two fingerprints.

456 It also needs information about the number of atoms from

457 each element, which is included in "typedic"."""

459 keys = sorted(fp1)

461 # calculating the weights:

462 w = {}

463 wtot = 0

464 for key in keys:

465 weight = len(typedic[key[0]]) * len(typedic[key[1]])

466 wtot += weight

467 w[key] = weight

468 for key in keys:

469 w[key] *= 1. / wtot

471 # calculating the fingerprint norms:

472 norm1 = 0

473 norm2 = 0

474 for key in keys:

475 norm1 += (np.linalg.norm(fp1[key])**2) * w[key]

476 norm2 += (np.linalg.norm(fp2[key])**2) * w[key]

477 norm1 = np.sqrt(norm1)

478 norm2 = np.sqrt(norm2)

480 # calculating the distance:

481 distance = 0

482 for key in keys:

483 distance += np.sum(fp1[key] * fp2[key]) * w[key] / (norm1 * norm2)

485 distance = 0.5 * (1 - distance)

486 return distance

488 def plot_fingerprints(self, a, prefix=''):

489 """ Function for quickly plotting all the fingerprints.

490 Prefix = a prefix you want to give to the resulting PNG file."""

491 try:

492 import matplotlib.pyplot as plt

493 except ImportError:

494 Warning("Matplotlib could not be loaded - plotting won't work")

495 raise

497 if 'fingerprints' in a.info and not self.recalculate:

498 fp, typedic = a.info['fingerprints']

499 fp, typedic = self._json_decode(fp, typedic)

500 else:

501 a_top = a[-self.n_top:]

502 fp, typedic = self._take_fingerprints(a_top)

503 a.info['fingerprints'] = self._json_encode(fp, typedic)

505 npts = int(np.ceil(self.rcut * 1. / self.binwidth))

506 x = np.linspace(0, self.rcut, npts, endpoint=False)

508 for key, val in fp.items():

509 plt.plot(x, val)

510 suffix = "_fp_{0}_{1}.png".format(key[0], key[1])

511 plt.savefig(prefix + suffix)

512 plt.clf()

514 def plot_individual_fingerprints(self, a, prefix=''):

515 """ Function for plotting all the individual fingerprints.

516 Prefix = a prefix for the resulting PNG file."""

517 try:

518 import matplotlib.pyplot as plt

519 except ImportError:

520 Warning("Matplotlib could not be loaded - plotting won't work")

521 raise

523 if 'individual_fingerprints' in a.info and not self.recalculate:

524 fp, typedic = a.info['individual_fingerprints']

525 else:

526 a_top = a[-self.n_top:]

527 fp, typedic = self._take_fingerprints(a_top, individual=True)

528 a.info['individual_fingerprints'] = [fp, typedic]

530 npts = int(np.ceil(self.rcut * 1. / self.binwidth))

531 x = np.linspace(0, self.rcut, npts, endpoint=False)

533 for key, val in fp.items():

534 for key2, val2 in val.items():

535 plt.plot(x, val2)

536 plt.ylim([-1, 10])

537 suffix = "_individual_fp_{0}_{1}.png".format(key, key2)

538 plt.savefig(prefix + suffix)

539 plt.clf()