Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Tools for generating new random starting candidates.""" 

2import numpy as np 

3from ase import Atoms 

4from ase.data import atomic_numbers 

5from ase.build import molecule 

6from ase.ga.utilities import (closest_distances_generator, atoms_too_close, 

7 atoms_too_close_two_sets) 

8 

9 

10class StartGenerator: 

11 """Class for generating random starting candidates. 

12 

13 Its basic task consists of randomly placing atoms or 

14 molecules within a predescribed box, while respecting 

15 certain minimal interatomic distances. 

16 

17 Depending on the problem at hand, certain box vectors 

18 may not be known or chosen beforehand, and hence also 

19 need to be generated at random. Common cases include 

20 bulk crystals, films and chains, with respectively 

21 3, 2 and 1 unknown cell vectors. 

22 

23 Parameters: 

24 

25 slab: Atoms object 

26 Specifies the cell vectors and periodic boundary conditions 

27 to be applied to the randomly generated structures. 

28 Any included atoms (e.g. representing an underlying slab) 

29 are copied to these new structures. 

30 Variable cell vectors (see number_of_variable_cell_vectors) 

31 will be ignored because these will be generated at random. 

32 

33 blocks: list 

34 List of building units for the structure. Each item can be: 

35 

36 * an integer: representing a single atom by its atomic number, 

37 * a string: for a single atom (a chemical symbol) or a 

38 molecule (name recognized by ase.build.molecule), 

39 * an Atoms object, 

40 * an (A, B) tuple or list where A is any of the above 

41 and B is the number of A units to include. 

42 

43 A few examples: 

44 

45 >>> blocks = ['Ti'] * 4 + ['O'] * 8 

46 >>> blocks = [('Ti', 4), ('O', 8)] 

47 >>> blocks = [('CO2', 3)] # 3 CO2 molecules 

48 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]]) 

49 >>> blocks = [(co, 3)] 

50 

51 Each individual block (single atom or molecule) in the 

52 randomly generated candidates is given a unique integer 

53 tag. These can be used to preserve the molecular identity 

54 of these subunits. 

55 

56 blmin: dict or float 

57 Dictionary with minimal interatomic distances. 

58 If a number is provided instead, the dictionary will 

59 be generated with this ratio of covalent bond radii. 

60 Note: when preserving molecular identity (see use_tags), 

61 the blmin dict will (naturally) only be applied 

62 to intermolecular distances (not the intramolecular 

63 ones). 

64 

65 number_of_variable_cell_vectors: int (default 0) 

66 The number of variable cell vectors (0, 1, 2 or 3). 

67 To keep things simple, it is the 'first' vectors which 

68 will be treated as variable, i.e. the 'a' vector in the 

69 univariate case, the 'a' and 'b' vectors in the bivariate 

70 case, etc. 

71 

72 box_to_place_in: [list, list of lists] (default None) 

73 The box in which the atoms can be placed. 

74 The default (None) means the box is equal to the 

75 entire unit cell of the 'slab' object. 

76 In many cases, however, smaller boxes are desired 

77 (e.g. for adsorbates on a slab surface or for isolated 

78 clusters). Then, box_to_place_in can be set as 

79 [p0, [v1, v2, v3]] with positions being generated as 

80 p0 + r1 * v1 + r2 * v2 + r3 + v3. 

81 In case of one or more variable cell vectors, 

82 the corresponding items in p0/v1/v2/v3 will be ignored. 

83 

84 box_volume: int or float or None (default) 

85 Initial guess for the box volume in cubic Angstrom 

86 (used in generating the variable cell vectors). 

87 Typical values in the solid state are 8-12 A^3 per atom. 

88 If there are no variable cell vectors, the default None 

89 is required (box volume equal to the box_to_place_in 

90 volume). 

91 

92 splits: dict or None 

93 Splitting scheme for increasing the translational symmetry 

94 in the random candidates, based on: 

95 

96 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__ 

97 

98 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007 

99 

100 This should be a dict specifying the relative probabilities 

101 for each split, written as tuples. For example, 

102 

103 >>> splits = {(2,): 3, (1,): 1} 

104 

105 This means that, for each structure, either a splitting 

106 factor of 2 is applied to one randomly chosen axis, 

107 or a splitting factor of 1 is applied (i.e., no splitting). 

108 The probability ratio of the two scenararios will be 3:1, 

109 i.e. 75% chance for the former and 25% chance for the latter 

110 splitting scheme. Only the directions in which the 'slab' 

111 object is periodic are eligible for splitting. 

112 

113 To e.g. always apply splitting factors of 2 and 3 along two 

114 randomly chosen axes: 

115 

116 >>> splits = {(2, 3): 1} 

117 

118 By default, no splitting is applied (splits = None = {(1,): 1}). 

119 

120 cellbounds: ase.ga.utilities.CellBounds instance 

121 Describing limits on the cell shape, see 

122 :class:`~ase.ga.utilities.CellBounds`. 

123 Note that it only make sense to impose conditions 

124 regarding cell vectors which have been marked as 

125 variable (see number_of_variable_cell_vectors). 

126 

127 test_dist_to_slab: bool (default True) 

128 Whether to make sure that the distances between 

129 the atoms and the slab satisfy the blmin. 

130 

131 test_too_far: bool (default True) 

132 Whether to also make sure that there are no isolated 

133 atoms or molecules with nearest-neighbour bond lengths 

134 larger than 2x the value in the blmin dict. 

135 

136 rng: Random number generator 

137 By default numpy.random. 

138 """ 

139 

140 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0, 

141 box_to_place_in=None, box_volume=None, splits=None, 

142 cellbounds=None, test_dist_to_slab=True, test_too_far=True, 

143 rng=np.random): 

144 

145 self.slab = slab 

146 

147 self.blocks = [] 

148 for item in blocks: 

149 if isinstance(item, tuple) or isinstance(item, list): 

150 assert len(item) == 2, 'Item length %d != 2' % len(item) 

151 block, count = item 

152 else: 

153 block, count = item, 1 

154 

155 # Convert block into Atoms object 

156 if isinstance(block, Atoms): 

157 pass 

158 elif block in atomic_numbers: 

159 block = Atoms(block) 

160 elif isinstance(block, str): 

161 block = molecule(block) 

162 elif block in atomic_numbers.values(): 

163 block = Atoms(numbers=[block]) 

164 else: 

165 raise ValueError('Cannot parse this block:', block) 

166 

167 # Add to self.blocks, taking into account that 

168 # we want to group the same blocks together. 

169 # This is important for the cell splitting. 

170 for i, (b, c) in enumerate(self.blocks): 

171 if block == b: 

172 self.blocks[i][1] += count 

173 break 

174 else: 

175 self.blocks.append([block, count]) 

176 

177 if isinstance(blmin, dict): 

178 self.blmin = blmin 

179 else: 

180 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks]) 

181 self.blmin = closest_distances_generator( 

182 numbers, 

183 ratio_of_covalent_radii=blmin) 

184 

185 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors 

186 assert self.number_of_variable_cell_vectors in range(4) 

187 if len(self.slab) > 0: 

188 msg = 'Including atoms in the slab only makes sense' 

189 msg += ' if there are no variable unit cell vectors' 

190 assert self.number_of_variable_cell_vectors == 0, msg 

191 for i in range(self.number_of_variable_cell_vectors): 

192 msg = 'Unit cell %s-vector is marked as variable ' % ('abc'[i]) 

193 msg += 'and slab must then also be periodic in this direction' 

194 assert self.slab.pbc[i], msg 

195 

196 if box_to_place_in is None: 

197 p0 = np.array([0., 0., 0.]) 

198 cell = self.slab.get_cell() 

199 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]] 

200 else: 

201 self.box_to_place_in = box_to_place_in 

202 

203 if box_volume is None: 

204 assert self.number_of_variable_cell_vectors == 0 

205 box_volume = abs(np.linalg.det(self.box_to_place_in[1])) 

206 else: 

207 assert self.number_of_variable_cell_vectors > 0 

208 self.box_volume = box_volume 

209 assert self.box_volume > 0 

210 

211 if splits is None: 

212 splits = {(1,): 1} 

213 tot = sum([v for v in splits.values()]) # normalization 

214 self.splits = {k: v * 1. / tot for k, v in splits.items()} 

215 

216 self.cellbounds = cellbounds 

217 self.test_too_far = test_too_far 

218 self.test_dist_to_slab = test_dist_to_slab 

219 self.rng = rng 

220 

221 def get_new_candidate(self, maxiter=None): 

222 """Returns a new candidate. 

223 

224 maxiter: upper bound on the total number of times 

225 the random position generator is called 

226 when generating the new candidate. 

227 

228 By default (maxiter=None) no such bound 

229 is imposed. If the generator takes too 

230 long time to create a new candidate, it 

231 may be suitable to specify a finite value. 

232 When the bound is exceeded, None is returned. 

233 """ 

234 pbc = self.slab.get_pbc() 

235 

236 # Choose cell splitting 

237 r = self.rng.random() 

238 cumprob = 0 

239 for split, prob in self.splits.items(): 

240 cumprob += prob 

241 if cumprob > r: 

242 break 

243 

244 # Choose direction(s) along which to split 

245 # and by how much 

246 directions = [i for i in range(3) if pbc[i]] 

247 repeat = [1, 1, 1] 

248 if len(directions) > 0: 

249 for number in split: 

250 d = self.rng.choice(directions) 

251 repeat[d] = number 

252 repeat = tuple(repeat) 

253 

254 # Generate the 'full' unit cell 

255 # for the eventual candidates 

256 cell = self.generate_unit_cell(repeat) 

257 if self.number_of_variable_cell_vectors == 0: 

258 assert np.allclose(cell, self.slab.get_cell()) 

259 

260 # Make the smaller 'box' in which we are 

261 # allowed to place the atoms and which will 

262 # then be repeated to fill the 'full' unit cell 

263 box = np.copy(cell) 

264 for i in range(self.number_of_variable_cell_vectors, 3): 

265 box[i] = np.array(self.box_to_place_in[1][i]) 

266 box /= np.array([repeat]).T 

267 

268 # Here we gather the (reduced) number of blocks 

269 # to put in the smaller box, and the 'surplus' 

270 # occurring when the block count is not divisible 

271 # by the number of repetitions. 

272 # E.g. if we have a ('Ti', 4) block and do a 

273 # [2, 3, 1] repetition, we employ a ('Ti', 1) 

274 # block in the smaller box and delete 2 out 6 

275 # Ti atoms afterwards 

276 nrep = int(np.prod(repeat)) 

277 blocks, ids, surplus = [], [], [] 

278 for i, (block, count) in enumerate(self.blocks): 

279 count_part = int(np.ceil(count * 1. / nrep)) 

280 blocks.extend([block] * count_part) 

281 surplus.append(nrep * count_part - count) 

282 ids.extend([i] * count_part) 

283 

284 N_blocks = len(blocks) 

285 

286 # Shuffle the ordering so different blocks 

287 # are added in random order 

288 order = np.arange(N_blocks) 

289 self.rng.shuffle(order) 

290 blocks = [blocks[i] for i in order] 

291 ids = np.array(ids)[order] 

292 

293 # Add blocks one by one until we have found 

294 # a valid candidate 

295 blmin = self.blmin 

296 blmin_too_far = {key: 2 * val for key, val in blmin.items()} 

297 

298 niter = 0 

299 while maxiter is None or niter < maxiter: 

300 cand = Atoms('', cell=box, pbc=pbc) 

301 

302 for i in range(N_blocks): 

303 atoms = blocks[i].copy() 

304 atoms.set_tags(i) 

305 atoms.set_pbc(pbc) 

306 atoms.set_cell(box, scale_atoms=False) 

307 

308 while maxiter is None or niter < maxiter: 

309 niter += 1 

310 cop = atoms.get_positions().mean(axis=0) 

311 pos = np.dot(self.rng.random((1, 3)), box) 

312 atoms.translate(pos - cop) 

313 

314 if len(atoms) > 1: 

315 # Apply a random rotation to multi-atom blocks 

316 phi, theta, psi = 360 * self.rng.random(3) 

317 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi, 

318 center=pos) 

319 

320 if not atoms_too_close_two_sets(cand, atoms, blmin): 

321 cand += atoms 

322 break 

323 else: 

324 # Reached maximum iteration number 

325 # Break out of the for loop above 

326 cand = None 

327 break 

328 

329 if cand is None: 

330 # Exit the main while loop 

331 break 

332 

333 # Rebuild the candidate after repeating, 

334 # randomly deleting surplus blocks and 

335 # sorting back to the original order 

336 cand_full = cand.repeat(repeat) 

337 

338 tags_full = cand_full.get_tags() 

339 for i in range(nrep): 

340 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks 

341 cand_full.set_tags(tags_full) 

342 

343 cand = Atoms('', cell=cell, pbc=pbc) 

344 ids_full = np.tile(ids, nrep) 

345 

346 tag_counter = 0 

347 if len(self.slab) > 0: 

348 tag_counter = int(max(self.slab.get_tags())) + 1 

349 

350 for i, (block, count) in enumerate(self.blocks): 

351 tags = np.where(ids_full == i)[0] 

352 bad = self.rng.choice(tags, size=surplus[i], replace=False) 

353 for tag in tags: 

354 if tag not in bad: 

355 select = [a.index for a in cand_full if a.tag == tag] 

356 atoms = cand_full[select] # is indeed a copy! 

357 atoms.set_tags(tag_counter) 

358 assert len(atoms) == len(block) 

359 cand += atoms 

360 tag_counter += 1 

361 

362 for i in range(self.number_of_variable_cell_vectors, 3): 

363 cand.positions[:, i] += self.box_to_place_in[0][i] 

364 

365 # By construction, the minimal interatomic distances 

366 # within the structure should already be respected 

367 assert not atoms_too_close(cand, blmin, use_tags=True), \ 

368 'This is not supposed to happen; please report this bug' 

369 

370 if self.test_dist_to_slab and len(self.slab) > 0: 

371 if atoms_too_close_two_sets(self.slab, cand, blmin): 

372 continue 

373 

374 if self.test_too_far: 

375 tags = cand.get_tags() 

376 

377 for tag in np.unique(tags): 

378 too_far = True 

379 indices_i = np.where(tags == tag)[0] 

380 indices_j = np.where(tags != tag)[0] 

381 too_far = not atoms_too_close_two_sets(cand[indices_i], 

382 cand[indices_j], 

383 blmin_too_far) 

384 

385 if too_far and len(self.slab) > 0: 

386 # the block is too far from the rest 

387 # but might still be sufficiently 

388 # close to the slab 

389 too_far = not atoms_too_close_two_sets(cand[indices_i], 

390 self.slab, 

391 blmin_too_far) 

392 if too_far: 

393 break 

394 else: 

395 too_far = False 

396 

397 if too_far: 

398 continue 

399 

400 # Passed all the tests 

401 cand = self.slab + cand 

402 cand.set_cell(cell, scale_atoms=False) 

403 break 

404 else: 

405 # Reached max iteration count in the while loop 

406 return None 

407 

408 return cand 

409 

410 def generate_unit_cell(self, repeat): 

411 """Generates a random unit cell. 

412 

413 For this, we use the vectors in self.slab.cell 

414 in the fixed directions and randomly generate 

415 the variable ones. For such a cell to be valid, 

416 it has to satisfy the self.cellbounds constraints. 

417 

418 The cell will also be such that the volume of the 

419 box in which the atoms can be placed (box limits 

420 described by self.box_to_place_in) is equal to 

421 self.box_volume. 

422 

423 Parameters: 

424 

425 repeat: tuple of 3 integers 

426 Indicates by how much each cell vector 

427 will later be reduced by cell splitting. 

428 

429 This is used to ensure that the original 

430 cell is large enough so that the cell lengths 

431 of the smaller cell exceed the largest 

432 (X,X)-minimal-interatomic-distance in self.blmin. 

433 """ 

434 # Find the minimal cell length 'Lmin' 

435 # that we need in order to ensure that 

436 # an added atom or molecule will never 

437 # be 'too close to itself' 

438 Lmin = 0. 

439 for atoms, count in self.blocks: 

440 dist = atoms.get_all_distances(mic=False, vector=False) 

441 num = atoms.get_atomic_numbers() 

442 

443 for i in range(len(atoms)): 

444 dist[i, i] += self.blmin[(num[i], num[i])] 

445 for j in range(i): 

446 bl = self.blmin[(num[i], num[j])] 

447 dist[i, j] += bl 

448 dist[j, i] += bl 

449 

450 L = np.max(dist) 

451 if L > Lmin: 

452 Lmin = L 

453 

454 # Generate a suitable unit cell 

455 valid = False 

456 while not valid: 

457 cell = np.zeros((3, 3)) 

458 

459 for i in range(self.number_of_variable_cell_vectors): 

460 # on-diagonal values 

461 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume) 

462 cell[i, i] *= repeat[i] 

463 for j in range(i): 

464 # off-diagonal values 

465 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1] 

466 

467 # volume scaling 

468 for i in range(self.number_of_variable_cell_vectors, 3): 

469 cell[i] = self.box_to_place_in[1][i] 

470 

471 if self.number_of_variable_cell_vectors > 0: 

472 volume = abs(np.linalg.det(cell)) 

473 scaling = self.box_volume / volume 

474 scaling **= 1. / self.number_of_variable_cell_vectors 

475 cell[:self.number_of_variable_cell_vectors] *= scaling 

476 

477 for i in range(self.number_of_variable_cell_vectors, 3): 

478 cell[i] = self.slab.get_cell()[i] 

479 

480 # bounds checking 

481 valid = True 

482 

483 if self.cellbounds is not None: 

484 if not self.cellbounds.is_within_bounds(cell): 

485 valid = False 

486 

487 if valid: 

488 for i in range(3): 

489 if np.linalg.norm(cell[i]) < repeat[i] * Lmin: 

490 assert self.number_of_variable_cell_vectors > 0 

491 valid = False 

492 

493 return cell