Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Implementation of the cut-and-splice paring operator.""" 

2import numpy as np 

3from ase import Atoms 

4from ase.geometry import find_mic 

5from ase.ga.utilities import (atoms_too_close, atoms_too_close_two_sets, 

6 gather_atoms_by_tag) 

7from ase.ga.offspring_creator import OffspringCreator 

8 

9 

10class Positions: 

11 """Helper object to simplify the pairing process. 

12 

13 Parameters: 

14 

15 scaled_positions: (Nx3) array 

16 Positions in scaled coordinates 

17 cop: (1x3) array 

18 Center-of-positions (also in scaled coordinates) 

19 symbols: str 

20 String with the atomic symbols 

21 distance: float 

22 Signed distance to the cutting plane 

23 origin: int (0 or 1) 

24 Determines at which side of the plane the position should be. 

25 """ 

26 

27 def __init__(self, scaled_positions, cop, symbols, distance, origin): 

28 self.scaled_positions = scaled_positions 

29 self.cop = cop 

30 self.symbols = symbols 

31 self.distance = distance 

32 self.origin = origin 

33 

34 def to_use(self): 

35 """Tells whether this position is at the right side.""" 

36 if self.distance > 0. and self.origin == 0: 

37 return True 

38 elif self.distance < 0. and self.origin == 1: 

39 return True 

40 else: 

41 return False 

42 

43 

44class CutAndSplicePairing(OffspringCreator): 

45 """The Cut and Splice operator by Deaven and Ho. 

46 

47 Creates offspring from two parent structures using 

48 a randomly generated cutting plane. 

49 

50 The parents may have different unit cells, in which 

51 case the offspring unit cell will be a random combination 

52 of the parent cells. 

53 

54 The basic implementation (for fixed unit cells) is 

55 described in: 

56 

57 :doi:`L.B. Vilhelmsen and B. Hammer, PRL, 108, 126101 (2012) 

58 <10.1103/PhysRevLett.108.126101`> 

59 

60 The extension to variable unit cells is similar to: 

61 

62 * :doi:`Glass, Oganov, Hansen, Comp. Phys. Comm. 175 (2006) 713-720 

63 <10.1016/j.cpc.2006.07.020>` 

64 

65 * :doi:`Lonie, Zurek, Comp. Phys. Comm. 182 (2011) 372-387 

66 <10.1016/j.cpc.2010.07.048>` 

67 

68 The operator can furthermore preserve molecular identity 

69 if desired (see the *use_tags* kwarg). Atoms with the same 

70 tag will then be considered as belonging to the same molecule, 

71 and their internal geometry will not be changed by the operator. 

72 

73 If use_tags is enabled, the operator will also conserve the 

74 number of molecules of each kind (in addition to conserving 

75 the overall stoichiometry). Currently, molecules are considered 

76 to be of the same kind if their chemical symbol strings are 

77 identical. In rare cases where this may not be sufficient 

78 (i.e. when desiring to keep the same ratio of isomers), the 

79 different isomers can be differentiated by giving them different 

80 elemental orderings (e.g. 'XY2' and 'Y2X'). 

81 

82 Parameters: 

83 

84 slab: Atoms object 

85 Specifies the cell vectors and periodic boundary conditions 

86 to be applied to the randomly generated structures. 

87 Any included atoms (e.g. representing an underlying slab) 

88 are copied to these new structures. 

89 

90 n_top: int 

91 The number of atoms to optimize 

92 

93 blmin: dict 

94 Dictionary with minimal interatomic distances. 

95 Note: when preserving molecular identity (see use_tags), 

96 the blmin dict will (naturally) only be applied 

97 to intermolecular distances (not the intramolecular 

98 ones). 

99 

100 number_of_variable_cell_vectors: int (default 0) 

101 The number of variable cell vectors (0, 1, 2 or 3). 

102 To keep things simple, it is the 'first' vectors which 

103 will be treated as variable, i.e. the 'a' vector in the 

104 univariate case, the 'a' and 'b' vectors in the bivariate 

105 case, etc. 

106 

107 p1: float or int between 0 and 1 

108 Probability that a parent is shifted over a random 

109 distance along the normal of the cutting plane 

110 (only operative if number_of_variable_cell_vectors > 0). 

111 

112 p2: float or int between 0 and 1 

113 Same as p1, but for shifting along the directions 

114 in the cutting plane (only operative if 

115 number_of_variable_cell_vectors > 0). 

116 

117 minfrac: float between 0 and 1, or None (default) 

118 Minimal fraction of atoms a parent must contribute 

119 to the child. If None, each parent must contribute 

120 at least one atom. 

121 

122 cellbounds: ase.ga.utilities.CellBounds instance 

123 Describing limits on the cell shape, see 

124 :class:`~ase.ga.utilities.CellBounds`. 

125 Note that it only make sense to impose conditions 

126 regarding cell vectors which have been marked as 

127 variable (see number_of_variable_cell_vectors). 

128 

129 use_tags: bool 

130 Whether to use the atomic tags to preserve 

131 molecular identity. 

132 

133 test_dist_to_slab: bool (default True) 

134 Whether to make sure that the distances between 

135 the atoms and the slab satisfy the blmin. 

136 

137 rng: Random number generator 

138 By default numpy.random. 

139 """ 

140 

141 def __init__(self, slab, n_top, blmin, number_of_variable_cell_vectors=0, 

142 p1=1, p2=0.05, minfrac=None, cellbounds=None, 

143 test_dist_to_slab=True, use_tags=False, rng=np.random, 

144 verbose=False): 

145 

146 OffspringCreator.__init__(self, verbose, rng=rng) 

147 self.slab = slab 

148 self.n_top = n_top 

149 self.blmin = blmin 

150 assert number_of_variable_cell_vectors in range(4) 

151 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors 

152 self.p1 = p1 

153 self.p2 = p2 

154 self.minfrac = minfrac 

155 self.cellbounds = cellbounds 

156 self.test_dist_to_slab = test_dist_to_slab 

157 self.use_tags = use_tags 

158 

159 self.scaling_volume = None 

160 self.descriptor = 'CutAndSplicePairing' 

161 self.min_inputs = 2 

162 

163 def update_scaling_volume(self, population, w_adapt=0.5, n_adapt=0): 

164 """Updates the scaling volume that is used in the pairing 

165 

166 w_adapt: weight of the new vs the old scaling volume 

167 n_adapt: number of best candidates in the population that 

168 are used to calculate the new scaling volume 

169 """ 

170 if not n_adapt: 

171 # take best 20% of the population 

172 n_adapt = int(np.ceil(0.2 * len(population))) 

173 v_new = np.mean([a.get_volume() for a in population[:n_adapt]]) 

174 

175 if not self.scaling_volume: 

176 self.scaling_volume = v_new 

177 else: 

178 volumes = [self.scaling_volume, v_new] 

179 weights = [1 - w_adapt, w_adapt] 

180 self.scaling_volume = np.average(volumes, weights=weights) 

181 

182 def get_new_individual(self, parents): 

183 """The method called by the user that 

184 returns the paired structure.""" 

185 f, m = parents 

186 

187 indi = self.cross(f, m) 

188 desc = 'pairing: {0} {1}'.format(f.info['confid'], 

189 m.info['confid']) 

190 # It is ok for an operator to return None 

191 # It means that it could not make a legal offspring 

192 # within a reasonable amount of time 

193 if indi is None: 

194 return indi, desc 

195 indi = self.initialize_individual(f, indi) 

196 indi.info['data']['parents'] = [f.info['confid'], 

197 m.info['confid']] 

198 

199 return self.finalize_individual(indi), desc 

200 

201 def cross(self, a1, a2): 

202 """Crosses the two atoms objects and returns one""" 

203 

204 if len(a1) != len(self.slab) + self.n_top: 

205 raise ValueError('Wrong size of structure to optimize') 

206 if len(a1) != len(a2): 

207 raise ValueError('The two structures do not have the same length') 

208 

209 N = self.n_top 

210 

211 # Only consider the atoms to optimize 

212 a1 = a1[len(a1) - N: len(a1)] 

213 a2 = a2[len(a2) - N: len(a2)] 

214 

215 if not np.array_equal(a1.numbers, a2.numbers): 

216 err = 'Trying to pair two structures with different stoichiometry' 

217 raise ValueError(err) 

218 

219 if self.use_tags and not np.array_equal(a1.get_tags(), a2.get_tags()): 

220 err = 'Trying to pair two structures with different tags' 

221 raise ValueError(err) 

222 

223 cell1 = a1.get_cell() 

224 cell2 = a2.get_cell() 

225 for i in range(self.number_of_variable_cell_vectors, 3): 

226 err = 'Unit cells are supposed to be identical in direction %d' 

227 assert np.allclose(cell1[i], cell2[i]), (err % i, cell1, cell2) 

228 

229 invalid = True 

230 counter = 0 

231 maxcount = 1000 

232 a1_copy = a1.copy() 

233 a2_copy = a2.copy() 

234 

235 # Run until a valid pairing is made or maxcount pairings are tested. 

236 while invalid and counter < maxcount: 

237 counter += 1 

238 

239 newcell = self.generate_unit_cell(cell1, cell2) 

240 if newcell is None: 

241 # No valid unit cell could be generated. 

242 # This strongly suggests that it is near-impossible 

243 # to generate one from these parent cells and it is 

244 # better to abort now. 

245 break 

246 

247 # Choose direction of cutting plane normal 

248 if self.number_of_variable_cell_vectors == 0: 

249 # Will be generated entirely at random 

250 theta = np.pi * self.rng.random() 

251 phi = 2. * np.pi * self.rng.random() 

252 cut_n = np.array([np.cos(phi) * np.sin(theta), 

253 np.sin(phi) * np.sin(theta), np.cos(theta)]) 

254 else: 

255 # Pick one of the 'variable' cell vectors 

256 cut_n = self.rng.choice(self.number_of_variable_cell_vectors) 

257 

258 # Randomly translate parent structures 

259 for a_copy, a in zip([a1_copy, a2_copy], [a1, a2]): 

260 a_copy.set_positions(a.get_positions()) 

261 

262 cell = a_copy.get_cell() 

263 for i in range(self.number_of_variable_cell_vectors): 

264 r = self.rng.random() 

265 cond1 = i == cut_n and r < self.p1 

266 cond2 = i != cut_n and r < self.p2 

267 if cond1 or cond2: 

268 a_copy.positions += self.rng.random() * cell[i] 

269 

270 if self.use_tags: 

271 # For correct determination of the center- 

272 # of-position of the multi-atom blocks, 

273 # we need to group their constituent atoms 

274 # together 

275 gather_atoms_by_tag(a_copy) 

276 else: 

277 a_copy.wrap() 

278 

279 # Generate the cutting point in scaled coordinates 

280 cosp1 = np.average(a1_copy.get_scaled_positions(), axis=0) 

281 cosp2 = np.average(a2_copy.get_scaled_positions(), axis=0) 

282 cut_p = np.zeros((1, 3)) 

283 for i in range(3): 

284 if i < self.number_of_variable_cell_vectors: 

285 cut_p[0, i] = self.rng.random() 

286 else: 

287 cut_p[0, i] = 0.5 * (cosp1[i] + cosp2[i]) 

288 

289 # Perform the pairing: 

290 child = self._get_pairing(a1_copy, a2_copy, cut_p, cut_n, newcell) 

291 if child is None: 

292 continue 

293 

294 # Verify whether the atoms are too close or not: 

295 if atoms_too_close(child, self.blmin, use_tags=self.use_tags): 

296 continue 

297 

298 if self.test_dist_to_slab and len(self.slab) > 0: 

299 if atoms_too_close_two_sets(self.slab, child, self.blmin): 

300 continue 

301 

302 # Passed all the tests 

303 child = self.slab + child 

304 child.set_cell(newcell, scale_atoms=False) 

305 child.wrap() 

306 return child 

307 

308 return None 

309 

310 def generate_unit_cell(self, cell1, cell2, maxcount=10000): 

311 """Generates a new unit cell by a random linear combination 

312 of the parent cells. The new cell must satisfy the 

313 self.cellbounds constraints. Returns None if no such cell 

314 was generated within a given number of attempts. 

315 

316 Parameters: 

317 

318 maxcount: int 

319 The maximal number of attempts. 

320 """ 

321 # First calculate the scaling volume 

322 if not self.scaling_volume: 

323 v1 = np.abs(np.linalg.det(cell1)) 

324 v2 = np.abs(np.linalg.det(cell2)) 

325 r = self.rng.random() 

326 v_ref = r * v1 + (1 - r) * v2 

327 else: 

328 v_ref = self.scaling_volume 

329 

330 # Now the cell vectors 

331 if self.number_of_variable_cell_vectors == 0: 

332 assert np.allclose(cell1, cell2), 'Parent cells are not the same' 

333 newcell = np.copy(cell1) 

334 else: 

335 count = 0 

336 while count < maxcount: 

337 r = self.rng.random() 

338 newcell = r * cell1 + (1 - r) * cell2 

339 

340 vol = abs(np.linalg.det(newcell)) 

341 scaling = v_ref / vol 

342 scaling **= 1. / self.number_of_variable_cell_vectors 

343 newcell[:self.number_of_variable_cell_vectors] *= scaling 

344 

345 found = True 

346 if self.cellbounds is not None: 

347 found = self.cellbounds.is_within_bounds(newcell) 

348 if found: 

349 break 

350 

351 count += 1 

352 else: 

353 # Did not find acceptable cell 

354 newcell = None 

355 

356 return newcell 

357 

358 def _get_pairing(self, a1, a2, cutting_point, cutting_normal, cell): 

359 """Creates a child from two parents using the given cut. 

360 

361 Returns None if the generated structure does not contain 

362 a large enough fraction of each parent (see self.minfrac). 

363 

364 Does not check whether atoms are too close. 

365 

366 Assumes the 'slab' parts have been removed from the parent 

367 structures and that these have been checked for equal 

368 lengths, stoichiometries, and tags (if self.use_tags). 

369 

370 Parameters: 

371 

372 cutting_normal: int or (1x3) array 

373 

374 cutting_point: (1x3) array 

375 In fractional coordinates 

376 

377 cell: (3x3) array 

378 The unit cell for the child structure 

379 """ 

380 symbols = a1.get_chemical_symbols() 

381 tags = a1.get_tags() if self.use_tags else np.arange(len(a1)) 

382 

383 # Generate list of all atoms / atom groups: 

384 p1, p2, sym = [], [], [] 

385 for i in np.unique(tags): 

386 indices = np.where(tags == i)[0] 

387 s = ''.join([symbols[j] for j in indices]) 

388 sym.append(s) 

389 

390 for i, (a, p) in enumerate(zip([a1, a2], [p1, p2])): 

391 c = a.get_cell() 

392 cop = np.mean(a.positions[indices], axis=0) 

393 cut_p = np.dot(cutting_point, c) 

394 if isinstance(cutting_normal, int): 

395 vecs = [c[j] for j in range(3) if j != cutting_normal] 

396 cut_n = np.cross(vecs[0], vecs[1]) 

397 else: 

398 cut_n = np.dot(cutting_normal, c) 

399 d = np.dot(cop - cut_p, cut_n) 

400 spos = a.get_scaled_positions()[indices] 

401 scop = np.mean(spos, axis=0) 

402 p.append(Positions(spos, scop, s, d, i)) 

403 

404 all_points = p1 + p2 

405 unique_sym = np.unique(sym) 

406 types = {s: sym.count(s) for s in unique_sym} 

407 

408 # Sort these by chemical symbols: 

409 all_points.sort(key=lambda x: x.symbols, reverse=True) 

410 

411 # For each atom type make the pairing 

412 unique_sym.sort() 

413 use_total = dict() 

414 for s in unique_sym: 

415 used = [] 

416 not_used = [] 

417 # The list is looked trough in 

418 # reverse order so atoms can be removed 

419 # from the list along the way. 

420 for i in reversed(range(len(all_points))): 

421 # If there are no more atoms of this type 

422 if all_points[i].symbols != s: 

423 break 

424 # Check if the atom should be included 

425 if all_points[i].to_use(): 

426 used.append(all_points.pop(i)) 

427 else: 

428 not_used.append(all_points.pop(i)) 

429 

430 assert len(used) + len(not_used) == types[s] * 2 

431 

432 # While we have too few of the given atom type 

433 while len(used) < types[s]: 

434 index = self.rng.randint(len(not_used)) 

435 used.append(not_used.pop(index)) 

436 

437 # While we have too many of the given atom type 

438 while len(used) > types[s]: 

439 # remove randomly: 

440 index = self.rng.randint(len(used)) 

441 not_used.append(used.pop(index)) 

442 

443 use_total[s] = used 

444 

445 n_tot = sum([len(ll) for ll in use_total.values()]) 

446 assert n_tot == len(sym) 

447 

448 # check if the generated structure contains 

449 # atoms from both parents: 

450 count1, count2, N = 0, 0, len(a1) 

451 for x in use_total.values(): 

452 count1 += sum([y.origin == 0 for y in x]) 

453 count2 += sum([y.origin == 1 for y in x]) 

454 

455 nmin = 1 if self.minfrac is None else int(round(self.minfrac * N)) 

456 if count1 < nmin or count2 < nmin: 

457 return None 

458 

459 # Construct the cartesian positions and reorder the atoms 

460 # to follow the original order 

461 newpos = [] 

462 pbc = a1.get_pbc() 

463 for s in sym: 

464 p = use_total[s].pop() 

465 c = a1.get_cell() if p.origin == 0 else a2.get_cell() 

466 pos = np.dot(p.scaled_positions, c) 

467 cop = np.dot(p.cop, c) 

468 vectors, lengths = find_mic(pos - cop, c, pbc) 

469 newcop = np.dot(p.cop, cell) 

470 pos = newcop + vectors 

471 for row in pos: 

472 newpos.append(row) 

473 

474 newpos = np.reshape(newpos, (N, 3)) 

475 num = a1.get_atomic_numbers() 

476 child = Atoms(numbers=num, positions=newpos, pbc=pbc, cell=cell, 

477 tags=tags) 

478 child.wrap() 

479 return child