Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import numpy as np 

2from itertools import combinations_with_replacement 

3from math import erf 

4from scipy.spatial.distance import cdist 

5from ase.neighborlist import NeighborList 

6from ase.utils import pbc2pbc 

7 

8 

9class OFPComparator: 

10 """Implementation of comparison using Oganov's fingerprint (OFP) 

11 functions, based on: 

12 

13 * :doi:`Oganov, Valle, J. Chem. Phys. 130, 104504 (2009) 

14 <10.1063/1.3079326>` 

15 

16 * :doi:`Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632 

17 <10.1016/j.cpc.2010.06.007>` 

18 

19 Parameters: 

20 

21 n_top: int or None 

22 The number of atoms to optimize (None = include all). 

23 

24 dE: float 

25 Energy difference above which two structures are 

26 automatically considered to be different. (Default 1 eV) 

27 

28 cos_dist_max: float 

29 Maximal cosine distance between two structures in 

30 order to be still considered the same structure. Default 5e-3 

31 

32 rcut: float 

33 Cutoff radius in Angstrom for the fingerprints. 

34 (Default 20 Angstrom) 

35 

36 binwidth: float 

37 Width in Angstrom of the bins over which the fingerprints 

38 are discretized. (Default 0.05 Angstrom) 

39 

40 pbc: list of three booleans or None 

41 Specifies whether to apply periodic boundary conditions 

42 along each of the three unit cell vectors when calculating 

43 the fingerprint. The default (None) is to apply PBCs in all 

44 3 directions. 

45 

46 Note: for isolated systems (pbc = [False, False, False]), 

47 the pair correlation function itself is always short-ranged 

48 (decays to zero beyond a certain radius), so unity is not 

49 subtracted for calculating the fingerprint. Also the 

50 volume normalization disappears. 

51 

52 maxdims: list of three floats or None 

53 If PBCs in only 1 or 2 dimensions are specified, the 

54 maximal thicknesses along the non-periodic directions can 

55 be specified here (the values given for the periodic 

56 directions will not be used). If set to None (the 

57 default), the length of the cell vector along the 

58 non-periodic direction is used. 

59 

60 Note: in this implementation, the cell vectors are 

61 assumed to be orthogonal. 

62 

63 sigma: float 

64 Standard deviation of the gaussian smearing to be applied 

65 in the calculation of the fingerprints (in 

66 Angstrom). Default 0.02 Angstrom. 

67 

68 nsigma: int 

69 Distance (as the number of standard deviations sigma) at 

70 which the gaussian smearing is cut off (i.e. no smearing 

71 beyond that distance). (Default 4) 

72 

73 recalculate: boolean 

74 If True, ignores the fingerprints stored in 

75 atoms.info and recalculates them. (Default False) 

76 

77 """ 

78 

79 def __init__(self, n_top=None, dE=1.0, cos_dist_max=5e-3, rcut=20., 

80 binwidth=0.05, sigma=0.02, nsigma=4, pbc=True, 

81 maxdims=None, recalculate=False): 

82 self.n_top = n_top or 0 

83 self.dE = dE 

84 self.cos_dist_max = cos_dist_max 

85 self.rcut = rcut 

86 self.binwidth = binwidth 

87 self.pbc = pbc2pbc(pbc) 

88 

89 if maxdims is None: 

90 self.maxdims = [None] * 3 

91 else: 

92 self.maxdims = maxdims 

93 

94 self.sigma = sigma 

95 self.nsigma = nsigma 

96 self.recalculate = recalculate 

97 self.dimensions = self.pbc.sum() 

98 

99 if self.dimensions == 1 or self.dimensions == 2: 

100 for direction in range(3): 

101 if not self.pbc[direction]: 

102 if self.maxdims[direction] is not None: 

103 if self.maxdims[direction] <= 0: 

104 e = '''If a max thickness is specificed in maxdims 

105 for a non-periodic direction, it has to be 

106 strictly positive.''' 

107 raise ValueError(e) 

108 

109 def looks_like(self, a1, a2): 

110 """ Return if structure a1 or a2 are similar or not. """ 

111 if len(a1) != len(a2): 

112 raise Exception('The two configurations are not the same size.') 

113 

114 # first we check the energy criteria 

115 if a1.calc is not None and a2.calc is not None: 

116 dE = abs(a1.get_potential_energy() - a2.get_potential_energy()) 

117 if dE >= self.dE: 

118 return False 

119 

120 # then we check the structure 

121 cos_dist = self._compare_structure(a1, a2) 

122 verdict = cos_dist < self.cos_dist_max 

123 return verdict 

124 

125 def _json_encode(self, fingerprints, typedic): 

126 """ json does not accept tuples nor integers as dict keys, 

127 so in order to write the fingerprints to atoms.info, we need 

128 to convert them to strings """ 

129 fingerprints_encoded = {} 

130 for key, val in fingerprints.items(): 

131 try: 

132 newkey = "_".join(map(str, list(key))) 

133 except TypeError: 

134 newkey = str(key) 

135 if isinstance(val, dict): 

136 fingerprints_encoded[newkey] = {} 

137 for key2, val2 in val.items(): 

138 fingerprints_encoded[newkey][str(key2)] = val2 

139 else: 

140 fingerprints_encoded[newkey] = val 

141 typedic_encoded = {} 

142 for key, val in typedic.items(): 

143 newkey = str(key) 

144 typedic_encoded[newkey] = val 

145 return [fingerprints_encoded, typedic_encoded] 

146 

147 def _json_decode(self, fingerprints, typedic): 

148 """ This is the reverse operation of _json_encode """ 

149 fingerprints_decoded = {} 

150 for key, val in fingerprints.items(): 

151 newkey = list(map(int, key.split("_"))) 

152 if len(newkey) > 1: 

153 newkey = tuple(newkey) 

154 else: 

155 newkey = newkey[0] 

156 

157 if isinstance(val, dict): 

158 fingerprints_decoded[newkey] = {} 

159 for key2, val2 in val.items(): 

160 fingerprints_decoded[newkey][int(key2)] = np.array(val2) 

161 else: 

162 fingerprints_decoded[newkey] = np.array(val) 

163 typedic_decoded = {} 

164 for key, val in typedic.items(): 

165 newkey = int(key) 

166 typedic_decoded[newkey] = val 

167 return [fingerprints_decoded, typedic_decoded] 

168 

169 def _compare_structure(self, a1, a2): 

170 """ Returns the cosine distance between the two structures, 

171 using their fingerprints. """ 

172 

173 if len(a1) != len(a2): 

174 raise Exception('The two configurations are not the same size.') 

175 

176 a1top = a1[-self.n_top:] 

177 a2top = a2[-self.n_top:] 

178 

179 if 'fingerprints' in a1.info and not self.recalculate: 

180 fp1, typedic1 = a1.info['fingerprints'] 

181 fp1, typedic1 = self._json_decode(fp1, typedic1) 

182 else: 

183 fp1, typedic1 = self._take_fingerprints(a1top) 

184 a1.info['fingerprints'] = self._json_encode(fp1, typedic1) 

185 

186 if 'fingerprints' in a2.info and not self.recalculate: 

187 fp2, typedic2 = a2.info['fingerprints'] 

188 fp2, typedic2 = self._json_decode(fp2, typedic2) 

189 else: 

190 fp2, typedic2 = self._take_fingerprints(a2top) 

191 a2.info['fingerprints'] = self._json_encode(fp2, typedic2) 

192 

193 if sorted(fp1) != sorted(fp2): 

194 raise AssertionError('The two structures have fingerprints ' 

195 'with different compounds.') 

196 for key in typedic1: 

197 if not np.array_equal(typedic1[key], typedic2[key]): 

198 raise AssertionError('The two structures have a different ' 

199 'stoichiometry or ordering!') 

200 

201 cos_dist = self._cosine_distance(fp1, fp2, typedic1) 

202 return cos_dist 

203 

204 def _get_volume(self, a): 

205 ''' Calculates the normalizing value, and other parameters 

206 (pmin,pmax,qmin,qmax) that are used for surface area calculation 

207 in the case of 1 or 2-D periodicity.''' 

208 

209 cell = a.get_cell() 

210 scalpos = a.get_scaled_positions() 

211 

212 # defaults: 

213 volume = 1. 

214 pmin, pmax, qmin, qmax = [0.] * 4 

215 

216 if self.dimensions == 1 or self.dimensions == 2: 

217 for direction in range(3): 

218 if not self.pbc[direction]: 

219 if self.maxdims[direction] is None: 

220 maxdim = np.linalg.norm(cell[direction, :]) 

221 self.maxdims[direction] = maxdim 

222 

223 pbc_dirs = [i for i in range(3) if self.pbc[i]] 

224 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]] 

225 

226 if self.dimensions == 3: 

227 volume = abs(np.dot(np.cross(cell[0, :], cell[1, :]), cell[2, :])) 

228 

229 elif self.dimensions == 2: 

230 non_pbc_dir = non_pbc_dirs[0] 

231 

232 a = np.cross(cell[pbc_dirs[0], :], cell[pbc_dirs[1], :]) 

233 b = self.maxdims[non_pbc_dir] 

234 b /= np.linalg.norm(cell[non_pbc_dir, :]) 

235 

236 volume = np.abs(np.dot(a, b * cell[non_pbc_dir, :])) 

237 

238 maxpos = np.max(scalpos[:, non_pbc_dir]) 

239 minpos = np.min(scalpos[:, non_pbc_dir]) 

240 pwidth = maxpos - minpos 

241 pmargin = 0.5 * (b - pwidth) 

242 # note: here is a place where we assume that the 

243 # non-periodic direction is orthogonal to the periodic ones: 

244 pmin = np.min(scalpos[:, non_pbc_dir]) - pmargin 

245 pmin *= np.linalg.norm(cell[non_pbc_dir, :]) 

246 pmax = np.max(scalpos[:, non_pbc_dir]) + pmargin 

247 pmax *= np.linalg.norm(cell[non_pbc_dir, :]) 

248 

249 elif self.dimensions == 1: 

250 pbc_dir = pbc_dirs[0] 

251 

252 v0 = cell[non_pbc_dirs[0], :] 

253 b0 = self.maxdims[non_pbc_dirs[0]] 

254 b0 /= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

255 v1 = cell[non_pbc_dirs[1], :] 

256 b1 = self.maxdims[non_pbc_dirs[1]] 

257 b1 /= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

258 

259 volume = np.abs(np.dot(np.cross(b0 * v0, b1 * v1), 

260 cell[pbc_dir, :])) 

261 

262 # note: here is a place where we assume that the 

263 # non-periodic direction is orthogonal to the periodic ones: 

264 maxpos = np.max(scalpos[:, non_pbc_dirs[0]]) 

265 minpos = np.min(scalpos[:, non_pbc_dirs[0]]) 

266 pwidth = maxpos - minpos 

267 pmargin = 0.5 * (b0 - pwidth) 

268 

269 pmin = np.min(scalpos[:, non_pbc_dirs[0]]) - pmargin 

270 pmin *= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

271 pmax = np.max(scalpos[:, non_pbc_dirs[0]]) + pmargin 

272 pmax *= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

273 

274 maxpos = np.max(scalpos[:, non_pbc_dirs[1]]) 

275 minpos = np.min(scalpos[:, non_pbc_dirs[1]]) 

276 qwidth = maxpos - minpos 

277 qmargin = 0.5 * (b1 - qwidth) 

278 

279 qmin = np.min(scalpos[:, non_pbc_dirs[1]]) - qmargin 

280 qmin *= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

281 qmax = np.max(scalpos[:, non_pbc_dirs[1]]) + qmargin 

282 qmax *= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

283 

284 elif self.dimensions == 0: 

285 volume = 1. 

286 

287 return [volume, pmin, pmax, qmin, qmax] 

288 

289 def _take_fingerprints(self, atoms, individual=False): 

290 """ Returns a [fingerprints,typedic] list, where fingerprints 

291 is a dictionary with the fingerprints, and typedic is a 

292 dictionary with the list of atom indices for each element 

293 (or "type") in the atoms object. 

294 The keys in the fingerprints dictionary are the (A,B) tuples, 

295 which are the different element-element combinations in the 

296 atoms object (A and B are the atomic numbers). 

297 When A != B, the (A,B) tuple is sorted (A < B). 

298 

299 If individual=True, a dict is returned, where each atom index 

300 has an {atomic_number:fingerprint} dict as value. 

301 If individual=False, the fingerprints from atoms of the same 

302 atomic number are added together.""" 

303 

304 pos = atoms.get_positions() 

305 num = atoms.get_atomic_numbers() 

306 cell = atoms.get_cell() 

307 

308 unique_types = np.unique(num) 

309 posdic = {} 

310 typedic = {} 

311 for t in unique_types: 

312 tlist = [i for i, atom in enumerate(atoms) if atom.number == t] 

313 typedic[t] = tlist 

314 posdic[t] = pos[tlist] 

315 

316 # determining the volume normalization and other parameters 

317 volume, pmin, pmax, qmin, qmax = self._get_volume(atoms) 

318 

319 # functions for calculating the surface area 

320 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]] 

321 

322 def surface_area_0d(r): 

323 return 4 * np.pi * (r**2) 

324 

325 def surface_area_1d(r, pos): 

326 q0 = pos[non_pbc_dirs[1]] 

327 phi1 = np.lib.scimath.arccos((qmax - q0) / r).real 

328 phi2 = np.pi - np.lib.scimath.arccos((qmin - q0) / r).real 

329 factor = 1 - (phi1 + phi2) / np.pi 

330 return surface_area_2d(r, pos) * factor 

331 

332 def surface_area_2d(r, pos): 

333 p0 = pos[non_pbc_dirs[0]] 

334 area = np.minimum(pmax - p0, r) + np.minimum(p0 - pmin, r) 

335 area *= 2 * np.pi * r 

336 return area 

337 

338 def surface_area_3d(r): 

339 return 4 * np.pi * (r**2) 

340 

341 # build neighborlist 

342 # this is computationally the most intensive part 

343 a = atoms.copy() 

344 a.set_pbc(self.pbc) 

345 nl = NeighborList([self.rcut / 2.] * len(a), skin=0., 

346 self_interaction=False, bothways=True) 

347 nl.update(a) 

348 

349 # parameters for the binning: 

350 m = int(np.ceil(self.nsigma * self.sigma / self.binwidth)) 

351 x = 0.25 * np.sqrt(2) * self.binwidth * (2 * m + 1) * 1. / self.sigma 

352 smearing_norm = erf(x) 

353 nbins = int(np.ceil(self.rcut * 1. / self.binwidth)) 

354 bindist = self.binwidth * np.arange(1, nbins + 1) 

355 

356 def take_individual_rdf(index, unique_type): 

357 # Computes the radial distribution function of atoms 

358 # of type unique_type around the atom with index "index". 

359 rdf = np.zeros(nbins) 

360 

361 if self.dimensions == 3: 

362 weights = 1. / surface_area_3d(bindist) 

363 elif self.dimensions == 2: 

364 weights = 1. / surface_area_2d(bindist, pos[index]) 

365 elif self.dimensions == 1: 

366 weights = 1. / surface_area_1d(bindist, pos[index]) 

367 elif self.dimensions == 0: 

368 weights = 1. / surface_area_0d(bindist) 

369 weights /= self.binwidth 

370 

371 indices, offsets = nl.get_neighbors(index) 

372 valid = np.where(num[indices] == unique_type) 

373 p = pos[indices[valid]] + np.dot(offsets[valid], cell) 

374 r = cdist(p, [pos[index]]) 

375 bins = np.floor(r / self.binwidth) 

376 

377 for i in range(-m, m + 1): 

378 newbins = bins + i 

379 valid = np.where((newbins >= 0) & (newbins < nbins)) 

380 valid_bins = newbins[valid].astype(int) 

381 values = weights[valid_bins] 

382 

383 c = 0.25 * np.sqrt(2) * self.binwidth * 1. / self.sigma 

384 values *= 0.5 * erf(c * (2 * i + 1)) - \ 

385 0.5 * erf(c * (2 * i - 1)) 

386 values /= smearing_norm 

387 

388 for j, valid_bin in enumerate(valid_bins): 

389 rdf[valid_bin] += values[j] 

390 

391 rdf /= len(typedic[unique_type]) * 1. / volume 

392 return rdf 

393 

394 fingerprints = {} 

395 if individual: 

396 for i in range(len(atoms)): 

397 fingerprints[i] = {} 

398 for unique_type in unique_types: 

399 fingerprint = take_individual_rdf(i, unique_type) 

400 if self.dimensions > 0: 

401 fingerprint -= 1 

402 fingerprints[i][unique_type] = fingerprint 

403 else: 

404 for t1, t2 in combinations_with_replacement(unique_types, r=2): 

405 key = (t1, t2) 

406 fingerprint = np.zeros(nbins) 

407 for i in typedic[t1]: 

408 fingerprint += take_individual_rdf(i, t2) 

409 fingerprint /= len(typedic[t1]) 

410 if self.dimensions > 0: 

411 fingerprint -= 1 

412 fingerprints[key] = fingerprint 

413 

414 return [fingerprints, typedic] 

415 

416 def _calculate_local_orders(self, individual_fingerprints, typedic, 

417 volume): 

418 """ Returns a list with the local order for every atom, 

419 using the definition of local order from 

420 Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632 

421 :doi:`10.1016/j.cpc.2010.06.007`""" 

422 

423 # total number of atoms: 

424 n_tot = sum([len(typedic[key]) for key in typedic]) 

425 

426 local_orders = [] 

427 for index, fingerprints in individual_fingerprints.items(): 

428 local_order = 0 

429 for unique_type, fingerprint in fingerprints.items(): 

430 term = np.linalg.norm(fingerprint)**2 

431 term *= self.binwidth 

432 term *= (volume * 1. / n_tot)**3 

433 term *= len(typedic[unique_type]) * 1. / n_tot 

434 local_order += term 

435 local_orders.append(np.sqrt(local_order)) 

436 

437 return local_orders 

438 

439 def get_local_orders(self, a): 

440 """ Returns the local orders of all the atoms.""" 

441 

442 a_top = a[-self.n_top:] 

443 key = 'individual_fingerprints' 

444 

445 if key in a.info and not self.recalculate: 

446 fp, typedic = self._json_decode(*a.info[key]) 

447 else: 

448 fp, typedic = self._take_fingerprints(a_top, individual=True) 

449 a.info[key] = self._json_encode(fp, typedic) 

450 

451 volume, pmin, pmax, qmin, qmax = self._get_volume(a_top) 

452 return self._calculate_local_orders(fp, typedic, volume) 

453 

454 def _cosine_distance(self, fp1, fp2, typedic): 

455 """ Returns the cosine distance from two fingerprints. 

456 It also needs information about the number of atoms from 

457 each element, which is included in "typedic".""" 

458 

459 keys = sorted(fp1) 

460 

461 # calculating the weights: 

462 w = {} 

463 wtot = 0 

464 for key in keys: 

465 weight = len(typedic[key[0]]) * len(typedic[key[1]]) 

466 wtot += weight 

467 w[key] = weight 

468 for key in keys: 

469 w[key] *= 1. / wtot 

470 

471 # calculating the fingerprint norms: 

472 norm1 = 0 

473 norm2 = 0 

474 for key in keys: 

475 norm1 += (np.linalg.norm(fp1[key])**2) * w[key] 

476 norm2 += (np.linalg.norm(fp2[key])**2) * w[key] 

477 norm1 = np.sqrt(norm1) 

478 norm2 = np.sqrt(norm2) 

479 

480 # calculating the distance: 

481 distance = 0 

482 for key in keys: 

483 distance += np.sum(fp1[key] * fp2[key]) * w[key] / (norm1 * norm2) 

484 

485 distance = 0.5 * (1 - distance) 

486 return distance 

487 

488 def plot_fingerprints(self, a, prefix=''): 

489 """ Function for quickly plotting all the fingerprints. 

490 Prefix = a prefix you want to give to the resulting PNG file.""" 

491 try: 

492 import matplotlib.pyplot as plt 

493 except ImportError: 

494 Warning("Matplotlib could not be loaded - plotting won't work") 

495 raise 

496 

497 if 'fingerprints' in a.info and not self.recalculate: 

498 fp, typedic = a.info['fingerprints'] 

499 fp, typedic = self._json_decode(fp, typedic) 

500 else: 

501 a_top = a[-self.n_top:] 

502 fp, typedic = self._take_fingerprints(a_top) 

503 a.info['fingerprints'] = self._json_encode(fp, typedic) 

504 

505 npts = int(np.ceil(self.rcut * 1. / self.binwidth)) 

506 x = np.linspace(0, self.rcut, npts, endpoint=False) 

507 

508 for key, val in fp.items(): 

509 plt.plot(x, val) 

510 suffix = "_fp_{0}_{1}.png".format(key[0], key[1]) 

511 plt.savefig(prefix + suffix) 

512 plt.clf() 

513 

514 def plot_individual_fingerprints(self, a, prefix=''): 

515 """ Function for plotting all the individual fingerprints. 

516 Prefix = a prefix for the resulting PNG file.""" 

517 try: 

518 import matplotlib.pyplot as plt 

519 except ImportError: 

520 Warning("Matplotlib could not be loaded - plotting won't work") 

521 raise 

522 

523 if 'individual_fingerprints' in a.info and not self.recalculate: 

524 fp, typedic = a.info['individual_fingerprints'] 

525 else: 

526 a_top = a[-self.n_top:] 

527 fp, typedic = self._take_fingerprints(a_top, individual=True) 

528 a.info['individual_fingerprints'] = [fp, typedic] 

529 

530 npts = int(np.ceil(self.rcut * 1. / self.binwidth)) 

531 x = np.linspace(0, self.rcut, npts, endpoint=False) 

532 

533 for key, val in fp.items(): 

534 for key2, val2 in val.items(): 

535 plt.plot(x, val2) 

536 plt.ylim([-1, 10]) 

537 suffix = "_individual_fp_{0}_{1}.png".format(key, key2) 

538 plt.savefig(prefix + suffix) 

539 plt.clf()