Coverage for /builds/ase/ase/ase/ga/startgenerator.py : 95.17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Tools for generating new random starting candidates."""
2import numpy as np
3from ase import Atoms
4from ase.data import atomic_numbers
5from ase.build import molecule
6from ase.ga.utilities import (closest_distances_generator, atoms_too_close,
7 atoms_too_close_two_sets)
10class StartGenerator:
11 """Class for generating random starting candidates.
13 Its basic task consists of randomly placing atoms or
14 molecules within a predescribed box, while respecting
15 certain minimal interatomic distances.
17 Depending on the problem at hand, certain box vectors
18 may not be known or chosen beforehand, and hence also
19 need to be generated at random. Common cases include
20 bulk crystals, films and chains, with respectively
21 3, 2 and 1 unknown cell vectors.
23 Parameters:
25 slab: Atoms object
26 Specifies the cell vectors and periodic boundary conditions
27 to be applied to the randomly generated structures.
28 Any included atoms (e.g. representing an underlying slab)
29 are copied to these new structures.
30 Variable cell vectors (see number_of_variable_cell_vectors)
31 will be ignored because these will be generated at random.
33 blocks: list
34 List of building units for the structure. Each item can be:
36 * an integer: representing a single atom by its atomic number,
37 * a string: for a single atom (a chemical symbol) or a
38 molecule (name recognized by ase.build.molecule),
39 * an Atoms object,
40 * an (A, B) tuple or list where A is any of the above
41 and B is the number of A units to include.
43 A few examples:
45 >>> blocks = ['Ti'] * 4 + ['O'] * 8
46 >>> blocks = [('Ti', 4), ('O', 8)]
47 >>> blocks = [('CO2', 3)] # 3 CO2 molecules
48 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]])
49 >>> blocks = [(co, 3)]
51 Each individual block (single atom or molecule) in the
52 randomly generated candidates is given a unique integer
53 tag. These can be used to preserve the molecular identity
54 of these subunits.
56 blmin: dict or float
57 Dictionary with minimal interatomic distances.
58 If a number is provided instead, the dictionary will
59 be generated with this ratio of covalent bond radii.
60 Note: when preserving molecular identity (see use_tags),
61 the blmin dict will (naturally) only be applied
62 to intermolecular distances (not the intramolecular
63 ones).
65 number_of_variable_cell_vectors: int (default 0)
66 The number of variable cell vectors (0, 1, 2 or 3).
67 To keep things simple, it is the 'first' vectors which
68 will be treated as variable, i.e. the 'a' vector in the
69 univariate case, the 'a' and 'b' vectors in the bivariate
70 case, etc.
72 box_to_place_in: [list, list of lists] (default None)
73 The box in which the atoms can be placed.
74 The default (None) means the box is equal to the
75 entire unit cell of the 'slab' object.
76 In many cases, however, smaller boxes are desired
77 (e.g. for adsorbates on a slab surface or for isolated
78 clusters). Then, box_to_place_in can be set as
79 [p0, [v1, v2, v3]] with positions being generated as
80 p0 + r1 * v1 + r2 * v2 + r3 + v3.
81 In case of one or more variable cell vectors,
82 the corresponding items in p0/v1/v2/v3 will be ignored.
84 box_volume: int or float or None (default)
85 Initial guess for the box volume in cubic Angstrom
86 (used in generating the variable cell vectors).
87 Typical values in the solid state are 8-12 A^3 per atom.
88 If there are no variable cell vectors, the default None
89 is required (box volume equal to the box_to_place_in
90 volume).
92 splits: dict or None
93 Splitting scheme for increasing the translational symmetry
94 in the random candidates, based on:
96 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__
98 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007
100 This should be a dict specifying the relative probabilities
101 for each split, written as tuples. For example,
103 >>> splits = {(2,): 3, (1,): 1}
105 This means that, for each structure, either a splitting
106 factor of 2 is applied to one randomly chosen axis,
107 or a splitting factor of 1 is applied (i.e., no splitting).
108 The probability ratio of the two scenararios will be 3:1,
109 i.e. 75% chance for the former and 25% chance for the latter
110 splitting scheme. Only the directions in which the 'slab'
111 object is periodic are eligible for splitting.
113 To e.g. always apply splitting factors of 2 and 3 along two
114 randomly chosen axes:
116 >>> splits = {(2, 3): 1}
118 By default, no splitting is applied (splits = None = {(1,): 1}).
120 cellbounds: ase.ga.utilities.CellBounds instance
121 Describing limits on the cell shape, see
122 :class:`~ase.ga.utilities.CellBounds`.
123 Note that it only make sense to impose conditions
124 regarding cell vectors which have been marked as
125 variable (see number_of_variable_cell_vectors).
127 test_dist_to_slab: bool (default True)
128 Whether to make sure that the distances between
129 the atoms and the slab satisfy the blmin.
131 test_too_far: bool (default True)
132 Whether to also make sure that there are no isolated
133 atoms or molecules with nearest-neighbour bond lengths
134 larger than 2x the value in the blmin dict.
136 rng: Random number generator
137 By default numpy.random.
138 """
140 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0,
141 box_to_place_in=None, box_volume=None, splits=None,
142 cellbounds=None, test_dist_to_slab=True, test_too_far=True,
143 rng=np.random):
145 self.slab = slab
147 self.blocks = []
148 for item in blocks:
149 if isinstance(item, tuple) or isinstance(item, list):
150 assert len(item) == 2, 'Item length %d != 2' % len(item)
151 block, count = item
152 else:
153 block, count = item, 1
155 # Convert block into Atoms object
156 if isinstance(block, Atoms):
157 pass
158 elif block in atomic_numbers:
159 block = Atoms(block)
160 elif isinstance(block, str):
161 block = molecule(block)
162 elif block in atomic_numbers.values():
163 block = Atoms(numbers=[block])
164 else:
165 raise ValueError('Cannot parse this block:', block)
167 # Add to self.blocks, taking into account that
168 # we want to group the same blocks together.
169 # This is important for the cell splitting.
170 for i, (b, c) in enumerate(self.blocks):
171 if block == b:
172 self.blocks[i][1] += count
173 break
174 else:
175 self.blocks.append([block, count])
177 if isinstance(blmin, dict):
178 self.blmin = blmin
179 else:
180 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks])
181 self.blmin = closest_distances_generator(
182 numbers,
183 ratio_of_covalent_radii=blmin)
185 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors
186 assert self.number_of_variable_cell_vectors in range(4)
187 if len(self.slab) > 0:
188 msg = 'Including atoms in the slab only makes sense'
189 msg += ' if there are no variable unit cell vectors'
190 assert self.number_of_variable_cell_vectors == 0, msg
191 for i in range(self.number_of_variable_cell_vectors):
192 msg = 'Unit cell %s-vector is marked as variable ' % ('abc'[i])
193 msg += 'and slab must then also be periodic in this direction'
194 assert self.slab.pbc[i], msg
196 if box_to_place_in is None:
197 p0 = np.array([0., 0., 0.])
198 cell = self.slab.get_cell()
199 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]]
200 else:
201 self.box_to_place_in = box_to_place_in
203 if box_volume is None:
204 assert self.number_of_variable_cell_vectors == 0
205 box_volume = abs(np.linalg.det(self.box_to_place_in[1]))
206 else:
207 assert self.number_of_variable_cell_vectors > 0
208 self.box_volume = box_volume
209 assert self.box_volume > 0
211 if splits is None:
212 splits = {(1,): 1}
213 tot = sum([v for v in splits.values()]) # normalization
214 self.splits = {k: v * 1. / tot for k, v in splits.items()}
216 self.cellbounds = cellbounds
217 self.test_too_far = test_too_far
218 self.test_dist_to_slab = test_dist_to_slab
219 self.rng = rng
221 def get_new_candidate(self, maxiter=None):
222 """Returns a new candidate.
224 maxiter: upper bound on the total number of times
225 the random position generator is called
226 when generating the new candidate.
228 By default (maxiter=None) no such bound
229 is imposed. If the generator takes too
230 long time to create a new candidate, it
231 may be suitable to specify a finite value.
232 When the bound is exceeded, None is returned.
233 """
234 pbc = self.slab.get_pbc()
236 # Choose cell splitting
237 r = self.rng.random()
238 cumprob = 0
239 for split, prob in self.splits.items():
240 cumprob += prob
241 if cumprob > r:
242 break
244 # Choose direction(s) along which to split
245 # and by how much
246 directions = [i for i in range(3) if pbc[i]]
247 repeat = [1, 1, 1]
248 if len(directions) > 0:
249 for number in split:
250 d = self.rng.choice(directions)
251 repeat[d] = number
252 repeat = tuple(repeat)
254 # Generate the 'full' unit cell
255 # for the eventual candidates
256 cell = self.generate_unit_cell(repeat)
257 if self.number_of_variable_cell_vectors == 0:
258 assert np.allclose(cell, self.slab.get_cell())
260 # Make the smaller 'box' in which we are
261 # allowed to place the atoms and which will
262 # then be repeated to fill the 'full' unit cell
263 box = np.copy(cell)
264 for i in range(self.number_of_variable_cell_vectors, 3):
265 box[i] = np.array(self.box_to_place_in[1][i])
266 box /= np.array([repeat]).T
268 # Here we gather the (reduced) number of blocks
269 # to put in the smaller box, and the 'surplus'
270 # occurring when the block count is not divisible
271 # by the number of repetitions.
272 # E.g. if we have a ('Ti', 4) block and do a
273 # [2, 3, 1] repetition, we employ a ('Ti', 1)
274 # block in the smaller box and delete 2 out 6
275 # Ti atoms afterwards
276 nrep = int(np.prod(repeat))
277 blocks, ids, surplus = [], [], []
278 for i, (block, count) in enumerate(self.blocks):
279 count_part = int(np.ceil(count * 1. / nrep))
280 blocks.extend([block] * count_part)
281 surplus.append(nrep * count_part - count)
282 ids.extend([i] * count_part)
284 N_blocks = len(blocks)
286 # Shuffle the ordering so different blocks
287 # are added in random order
288 order = np.arange(N_blocks)
289 self.rng.shuffle(order)
290 blocks = [blocks[i] for i in order]
291 ids = np.array(ids)[order]
293 # Add blocks one by one until we have found
294 # a valid candidate
295 blmin = self.blmin
296 blmin_too_far = {key: 2 * val for key, val in blmin.items()}
298 niter = 0
299 while maxiter is None or niter < maxiter:
300 cand = Atoms('', cell=box, pbc=pbc)
302 for i in range(N_blocks):
303 atoms = blocks[i].copy()
304 atoms.set_tags(i)
305 atoms.set_pbc(pbc)
306 atoms.set_cell(box, scale_atoms=False)
308 while maxiter is None or niter < maxiter:
309 niter += 1
310 cop = atoms.get_positions().mean(axis=0)
311 pos = np.dot(self.rng.random((1, 3)), box)
312 atoms.translate(pos - cop)
314 if len(atoms) > 1:
315 # Apply a random rotation to multi-atom blocks
316 phi, theta, psi = 360 * self.rng.random(3)
317 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi,
318 center=pos)
320 if not atoms_too_close_two_sets(cand, atoms, blmin):
321 cand += atoms
322 break
323 else:
324 # Reached maximum iteration number
325 # Break out of the for loop above
326 cand = None
327 break
329 if cand is None:
330 # Exit the main while loop
331 break
333 # Rebuild the candidate after repeating,
334 # randomly deleting surplus blocks and
335 # sorting back to the original order
336 cand_full = cand.repeat(repeat)
338 tags_full = cand_full.get_tags()
339 for i in range(nrep):
340 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks
341 cand_full.set_tags(tags_full)
343 cand = Atoms('', cell=cell, pbc=pbc)
344 ids_full = np.tile(ids, nrep)
346 tag_counter = 0
347 if len(self.slab) > 0:
348 tag_counter = int(max(self.slab.get_tags())) + 1
350 for i, (block, count) in enumerate(self.blocks):
351 tags = np.where(ids_full == i)[0]
352 bad = self.rng.choice(tags, size=surplus[i], replace=False)
353 for tag in tags:
354 if tag not in bad:
355 select = [a.index for a in cand_full if a.tag == tag]
356 atoms = cand_full[select] # is indeed a copy!
357 atoms.set_tags(tag_counter)
358 assert len(atoms) == len(block)
359 cand += atoms
360 tag_counter += 1
362 for i in range(self.number_of_variable_cell_vectors, 3):
363 cand.positions[:, i] += self.box_to_place_in[0][i]
365 # By construction, the minimal interatomic distances
366 # within the structure should already be respected
367 assert not atoms_too_close(cand, blmin, use_tags=True), \
368 'This is not supposed to happen; please report this bug'
370 if self.test_dist_to_slab and len(self.slab) > 0:
371 if atoms_too_close_two_sets(self.slab, cand, blmin):
372 continue
374 if self.test_too_far:
375 tags = cand.get_tags()
377 for tag in np.unique(tags):
378 too_far = True
379 indices_i = np.where(tags == tag)[0]
380 indices_j = np.where(tags != tag)[0]
381 too_far = not atoms_too_close_two_sets(cand[indices_i],
382 cand[indices_j],
383 blmin_too_far)
385 if too_far and len(self.slab) > 0:
386 # the block is too far from the rest
387 # but might still be sufficiently
388 # close to the slab
389 too_far = not atoms_too_close_two_sets(cand[indices_i],
390 self.slab,
391 blmin_too_far)
392 if too_far:
393 break
394 else:
395 too_far = False
397 if too_far:
398 continue
400 # Passed all the tests
401 cand = self.slab + cand
402 cand.set_cell(cell, scale_atoms=False)
403 break
404 else:
405 # Reached max iteration count in the while loop
406 return None
408 return cand
410 def generate_unit_cell(self, repeat):
411 """Generates a random unit cell.
413 For this, we use the vectors in self.slab.cell
414 in the fixed directions and randomly generate
415 the variable ones. For such a cell to be valid,
416 it has to satisfy the self.cellbounds constraints.
418 The cell will also be such that the volume of the
419 box in which the atoms can be placed (box limits
420 described by self.box_to_place_in) is equal to
421 self.box_volume.
423 Parameters:
425 repeat: tuple of 3 integers
426 Indicates by how much each cell vector
427 will later be reduced by cell splitting.
429 This is used to ensure that the original
430 cell is large enough so that the cell lengths
431 of the smaller cell exceed the largest
432 (X,X)-minimal-interatomic-distance in self.blmin.
433 """
434 # Find the minimal cell length 'Lmin'
435 # that we need in order to ensure that
436 # an added atom or molecule will never
437 # be 'too close to itself'
438 Lmin = 0.
439 for atoms, count in self.blocks:
440 dist = atoms.get_all_distances(mic=False, vector=False)
441 num = atoms.get_atomic_numbers()
443 for i in range(len(atoms)):
444 dist[i, i] += self.blmin[(num[i], num[i])]
445 for j in range(i):
446 bl = self.blmin[(num[i], num[j])]
447 dist[i, j] += bl
448 dist[j, i] += bl
450 L = np.max(dist)
451 if L > Lmin:
452 Lmin = L
454 # Generate a suitable unit cell
455 valid = False
456 while not valid:
457 cell = np.zeros((3, 3))
459 for i in range(self.number_of_variable_cell_vectors):
460 # on-diagonal values
461 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume)
462 cell[i, i] *= repeat[i]
463 for j in range(i):
464 # off-diagonal values
465 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1]
467 # volume scaling
468 for i in range(self.number_of_variable_cell_vectors, 3):
469 cell[i] = self.box_to_place_in[1][i]
471 if self.number_of_variable_cell_vectors > 0:
472 volume = abs(np.linalg.det(cell))
473 scaling = self.box_volume / volume
474 scaling **= 1. / self.number_of_variable_cell_vectors
475 cell[:self.number_of_variable_cell_vectors] *= scaling
477 for i in range(self.number_of_variable_cell_vectors, 3):
478 cell[i] = self.slab.get_cell()[i]
480 # bounds checking
481 valid = True
483 if self.cellbounds is not None:
484 if not self.cellbounds.is_within_bounds(cell):
485 valid = False
487 if valid:
488 for i in range(3):
489 if np.linalg.norm(cell[i]) < repeat[i] * Lmin:
490 assert self.number_of_variable_cell_vectors > 0
491 valid = False
493 return cell