Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" Implementation of a population for maintaining a GA population and 

2proposing structures to pair. """ 

3from math import tanh, sqrt, exp 

4from operator import itemgetter 

5import numpy as np 

6 

7from ase.db.core import now 

8from ase.ga import get_raw_score 

9 

10 

11def count_looks_like(a, all_cand, comp): 

12 """Utility method for counting occurrences.""" 

13 n = 0 

14 for b in all_cand: 

15 if a.info['confid'] == b.info['confid']: 

16 continue 

17 if comp.looks_like(a, b): 

18 n += 1 

19 return n 

20 

21 

22class Population: 

23 """Population class which maintains the current population 

24 and proposes which candidates to pair together. 

25 

26 Parameters: 

27 

28 data_connection: DataConnection object 

29 Bla bla bla. 

30 

31 population_size: int 

32 The number of candidates in the population. 

33 

34 comparator: Comparator object 

35 this will tell if two configurations are equal. 

36 Default compare atoms objects directly. 

37 

38 logfile: str 

39 Text file that contains information about the population 

40 The format is:: 

41 

42 timestamp: generation(if available): id1,id2,id3... 

43 

44 Using this file greatly speeds up convergence checks. 

45 Default None meaning that no file is written. 

46 

47 use_extinct: boolean 

48 Set this to True if mass extinction and the extinct key 

49 are going to be used. Default is False. 

50 

51 rng: Random number generator 

52 By default numpy.random. 

53 """ 

54 

55 def __init__(self, data_connection, population_size, 

56 comparator=None, logfile=None, use_extinct=False, 

57 rng=np.random): 

58 self.dc = data_connection 

59 self.pop_size = population_size 

60 if comparator is None: 

61 from ase.ga.standard_comparators import AtomsComparator 

62 comparator = AtomsComparator() 

63 self.comparator = comparator 

64 self.logfile = logfile 

65 self.use_extinct = use_extinct 

66 self.rng = rng 

67 self.pop = [] 

68 self.pairs = None 

69 self.all_cand = None 

70 self.__initialize_pop__() 

71 

72 def __initialize_pop__(self): 

73 """ Private method that initializes the population when 

74 the population is created. """ 

75 

76 # Get all relaxed candidates from the database 

77 ue = self.use_extinct 

78 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

79 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'], 

80 reverse=True) 

81 # all_cand.sort(key=lambda x: x.get_potential_energy()) 

82 

83 # Fill up the population with the self.pop_size most stable 

84 # unique candidates. 

85 i = 0 

86 while i < len(all_cand) and len(self.pop) < self.pop_size: 

87 c = all_cand[i] 

88 i += 1 

89 eq = False 

90 for a in self.pop: 

91 if self.comparator.looks_like(a, c): 

92 eq = True 

93 break 

94 if not eq: 

95 self.pop.append(c) 

96 

97 for a in self.pop: 

98 a.info['looks_like'] = count_looks_like(a, all_cand, 

99 self.comparator) 

100 

101 self.all_cand = all_cand 

102 self.__calc_participation__() 

103 

104 def __calc_participation__(self): 

105 """ Determines, from the database, how many times each 

106 candidate has been used to generate new candidates. """ 

107 (participation, pairs) = self.dc.get_participation_in_pairing() 

108 for a in self.pop: 

109 if a.info['confid'] in participation.keys(): 

110 a.info['n_paired'] = participation[a.info['confid']] 

111 else: 

112 a.info['n_paired'] = 0 

113 self.pairs = pairs 

114 

115 def update(self, new_cand=None): 

116 """ New candidates can be added to the database 

117 after the population object has been created. 

118 This method extracts these new candidates from the 

119 database and includes them in the population. """ 

120 

121 if len(self.pop) == 0: 

122 self.__initialize_pop__() 

123 

124 if new_cand is None: 

125 ue = self.use_extinct 

126 new_cand = self.dc.get_all_relaxed_candidates(only_new=True, 

127 use_extinct=ue) 

128 

129 for a in new_cand: 

130 self.__add_candidate__(a) 

131 self.all_cand.append(a) 

132 self.__calc_participation__() 

133 self._write_log() 

134 

135 def get_current_population(self): 

136 """ Returns a copy of the current population. """ 

137 self.update() 

138 return [a.copy() for a in self.pop] 

139 

140 def get_population_after_generation(self, gen): 

141 """ Returns a copy of the population as it where 

142 after generation gen""" 

143 if self.logfile is not None: 

144 fd = open(self.logfile, 'r') 

145 gens = {} 

146 for line in fd: 

147 _, no, popul = line.split(':') 

148 gens[int(no)] = [int(i) for i in popul.split(',')] 

149 fd.close() 

150 return [c.copy() for c in self.all_cand[::-1] 

151 if c.info['relax_id'] in gens[gen]] 

152 

153 all_candidates = [c for c in self.all_cand 

154 if c.info['key_value_pairs']['generation'] <= gen] 

155 cands = [all_candidates[0]] 

156 for b in all_candidates: 

157 if b not in cands: 

158 for a in cands: 

159 if self.comparator.looks_like(a, b): 

160 break 

161 else: 

162 cands.append(b) 

163 pop = cands[:self.pop_size] 

164 return [a.copy() for a in pop] 

165 

166 def __add_candidate__(self, a): 

167 """ Adds a single candidate to the population. """ 

168 

169 # check if the structure is too low in raw score 

170 raw_score_a = get_raw_score(a) 

171 raw_score_worst = get_raw_score(self.pop[-1]) 

172 if raw_score_a < raw_score_worst \ 

173 and len(self.pop) == self.pop_size: 

174 return 

175 

176 # check if the new candidate should 

177 # replace a similar structure in the population 

178 for (i, b) in enumerate(self.pop): 

179 if self.comparator.looks_like(a, b): 

180 if get_raw_score(b) < raw_score_a: 

181 del self.pop[i] 

182 a.info['looks_like'] = count_looks_like(a, 

183 self.all_cand, 

184 self.comparator) 

185 self.pop.append(a) 

186 self.pop.sort(key=lambda x: get_raw_score(x), 

187 reverse=True) 

188 return 

189 

190 # the new candidate needs to be added, so remove the highest 

191 # energy one 

192 if len(self.pop) == self.pop_size: 

193 del self.pop[-1] 

194 

195 # add the new candidate 

196 a.info['looks_like'] = count_looks_like(a, 

197 self.all_cand, 

198 self.comparator) 

199 self.pop.append(a) 

200 self.pop.sort(key=lambda x: get_raw_score(x), reverse=True) 

201 

202 def __get_fitness__(self, indecies, with_history=True): 

203 """Calculates the fitness using the formula from 

204 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

205 

206 Sign change on the fitness compared to the formulation in the 

207 abovementioned paper due to maximizing raw_score instead of 

208 minimizing energy. (Set raw_score=-energy to optimize the energy) 

209 """ 

210 

211 scores = [get_raw_score(x) for x in self.pop] 

212 min_s = min(scores) 

213 max_s = max(scores) 

214 T = min_s - max_s 

215 if isinstance(indecies, int): 

216 indecies = [indecies] 

217 

218 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.)) 

219 for i in indecies] 

220 if with_history: 

221 M = [float(self.pop[i].info['n_paired']) for i in indecies] 

222 L = [float(self.pop[i].info['looks_like']) for i in indecies] 

223 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i]) 

224 for i in range(len(f))] 

225 return f 

226 

227 def get_two_candidates(self, with_history=True): 

228 """ Returns two candidates for pairing employing the 

229 fitness criteria from 

230 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

231 and the roulete wheel selection scheme described in 

232 R.L. Johnston Dalton Transactions, 

233 Vol. 22, No. 22. (2003), pp. 4193-4207 

234 """ 

235 

236 if len(self.pop) < 2: 

237 self.update() 

238 

239 if len(self.pop) < 2: 

240 return None 

241 

242 fit = self.__get_fitness__(range(len(self.pop)), with_history) 

243 fmax = max(fit) 

244 c1 = self.pop[0] 

245 c2 = self.pop[0] 

246 used_before = False 

247 while c1.info['confid'] == c2.info['confid'] and not used_before: 

248 nnf = True 

249 while nnf: 

250 t = self.rng.randint(len(self.pop)) 

251 if fit[t] > self.rng.random() * fmax: 

252 c1 = self.pop[t] 

253 nnf = False 

254 nnf = True 

255 while nnf: 

256 t = self.rng.randint(len(self.pop)) 

257 if fit[t] > self.rng.random() * fmax: 

258 c2 = self.pop[t] 

259 nnf = False 

260 

261 c1id = c1.info['confid'] 

262 c2id = c2.info['confid'] 

263 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs 

264 return (c1.copy(), c2.copy()) 

265 

266 def get_one_candidate(self, with_history=True): 

267 """Returns one candidate for mutation employing the 

268 fitness criteria from 

269 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

270 and the roulete wheel selection scheme described in 

271 R.L. Johnston Dalton Transactions, 

272 Vol. 22, No. 22. (2003), pp. 4193-4207 

273 """ 

274 if len(self.pop) < 1: 

275 self.update() 

276 

277 if len(self.pop) < 1: 

278 return None 

279 

280 fit = self.__get_fitness__(range(len(self.pop)), with_history) 

281 fmax = max(fit) 

282 nnf = True 

283 while nnf: 

284 t = self.rng.randint(len(self.pop)) 

285 if fit[t] > self.rng.random() * fmax: 

286 c1 = self.pop[t] 

287 nnf = False 

288 

289 return c1.copy() 

290 

291 def _write_log(self): 

292 """Writes the population to a logfile. 

293 

294 The format is:: 

295 

296 timestamp: generation(if available): id1,id2,id3...""" 

297 if self.logfile is not None: 

298 ids = [str(a.info['relax_id']) for a in self.pop] 

299 if ids != []: 

300 try: 

301 gen_nums = [c.info['key_value_pairs']['generation'] 

302 for c in self.all_cand] 

303 max_gen = max(gen_nums) 

304 except KeyError: 

305 max_gen = ' ' 

306 fd = open(self.logfile, 'a') 

307 fd.write('{time}: {gen}: {pop}\n'.format(time=now(), 

308 pop=','.join(ids), 

309 gen=max_gen)) 

310 fd.close() 

311 

312 def is_uniform(self, func, min_std, pop=None): 

313 """Tests whether the current population is uniform or diverse. 

314 Returns True if uniform, False otherwise. 

315 

316 Parameters: 

317 

318 func: function 

319 that takes one argument an atoms object and returns a value that 

320 will be used for testing against the rest of the population. 

321 

322 min_std: int or float 

323 The minimum standard deviation, if the population has a lower 

324 std dev it is uniform. 

325 

326 pop: list, optional 

327 use this list of Atoms objects instead of the current population. 

328 """ 

329 if pop is None: 

330 pop = self.pop 

331 vals = [func(a) for a in pop] 

332 stddev = np.std(vals) 

333 if stddev < min_std: 

334 return True 

335 return False 

336 

337 def mass_extinction(self, ids): 

338 """Kills every candidate in the database with gaid in the 

339 supplied list of ids. Typically used on the main part of the current 

340 population if the diversity is to small. 

341 

342 Parameters: 

343 

344 ids: list 

345 list of ids of candidates to be killed. 

346 

347 """ 

348 for confid in ids: 

349 self.dc.kill_candidate(confid) 

350 self.pop = [] 

351 

352 

353class RandomPopulation(Population): 

354 def __init__(self, data_connection, population_size, 

355 comparator=None, logfile=None, exclude_used_pairs=False, 

356 bad_candidates=0, use_extinct=False): 

357 self.exclude_used_pairs = exclude_used_pairs 

358 self.bad_candidates = bad_candidates 

359 Population.__init__(self, data_connection, population_size, 

360 comparator, logfile, use_extinct) 

361 

362 def __initialize_pop__(self): 

363 """ Private method that initializes the population when 

364 the population is created. """ 

365 

366 # Get all relaxed candidates from the database 

367 ue = self.use_extinct 

368 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

369 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) 

370 # all_cand.sort(key=lambda x: x.get_potential_energy()) 

371 

372 if len(all_cand) > 0: 

373 # Fill up the population with the self.pop_size most stable 

374 # unique candidates. 

375 ratings = [] 

376 best_raw = get_raw_score(all_cand[0]) 

377 i = 0 

378 while i < len(all_cand): 

379 c = all_cand[i] 

380 i += 1 

381 eq = False 

382 for a in self.pop: 

383 if self.comparator.looks_like(a, c): 

384 eq = True 

385 break 

386 if not eq: 

387 if len(self.pop) < self.pop_size - self.bad_candidates: 

388 self.pop.append(c) 

389 else: 

390 exp_fact = exp(get_raw_score(c) / best_raw) 

391 ratings.append([c, (exp_fact - 1) * self.rng.random()]) 

392 ratings.sort(key=itemgetter(1), reverse=True) 

393 

394 for i in range(self.bad_candidates): 

395 self.pop.append(ratings[i][0]) 

396 

397 for a in self.pop: 

398 a.info['looks_like'] = count_looks_like(a, all_cand, 

399 self.comparator) 

400 

401 self.all_cand = all_cand 

402 self.__calc_participation__() 

403 

404 def update(self): 

405 """ The update method in Population will add to the end of 

406 the population, that can't be used here since we might have 

407 bad candidates that need to stay in the population, therefore 

408 just recalc the population every time. """ 

409 

410 self.pop = [] 

411 self.__initialize_pop__() 

412 

413 self._write_log() 

414 

415 def get_one_candidate(self): 

416 """Returns one candidates at random.""" 

417 if len(self.pop) < 1: 

418 self.update() 

419 

420 if len(self.pop) < 1: 

421 return None 

422 

423 t = self.rng.randint(len(self.pop)) 

424 c = self.pop[t] 

425 

426 return c.copy() 

427 

428 def get_two_candidates(self): 

429 """Returns two candidates at random.""" 

430 if len(self.pop) < 2: 

431 self.update() 

432 

433 if len(self.pop) < 2: 

434 return None 

435 

436 c1 = self.pop[0] 

437 c2 = self.pop[0] 

438 used_before = False 

439 while c1.info['confid'] == c2.info['confid'] and not used_before: 

440 t = self.rng.randint(len(self.pop)) 

441 c1 = self.pop[t] 

442 t = self.rng.randint(len(self.pop)) 

443 c2 = self.pop[t] 

444 

445 c1id = c1.info['confid'] 

446 c2id = c2.info['confid'] 

447 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and 

448 self.exclude_used_pairs) 

449 return (c1.copy(), c2.copy()) 

450 

451 

452class FitnessSharingPopulation(Population): 

453 """ Fitness sharing population that penalizes structures if they are 

454 too similar. This is determined by a distance measure 

455 

456 Parameters: 

457 

458 comp_key: string 

459 Key where the distance measure can be found in the 

460 atoms.info['key_value_pairs'] dictionary. 

461 

462 threshold: float or int 

463 Value above which no penalization of the fitness takes place 

464 

465 alpha_sh: float or int 

466 Determines the shape of the sharing function. 

467 Default is 1, which gives a linear sharing function. 

468 

469 """ 

470 

471 def __init__(self, data_connection, population_size, 

472 comp_key, threshold, alpha_sh=1., 

473 comparator=None, logfile=None, use_extinct=False): 

474 self.comp_key = comp_key 

475 self.dt = threshold # dissimilarity threshold 

476 self.alpha_sh = alpha_sh 

477 self.fit_scaling = 1. 

478 

479 self.sh_cache = dict() 

480 

481 Population.__init__(self, data_connection, population_size, 

482 comparator, logfile, use_extinct) 

483 

484 def __get_fitness__(self, candidates): 

485 """Input should be sorted according to raw_score.""" 

486 max_s = get_raw_score(candidates[0]) 

487 min_s = get_raw_score(candidates[-1]) 

488 T = min_s - max_s 

489 

490 shared_fit = [] 

491 for c in candidates: 

492 sc = get_raw_score(c) 

493 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.)) 

494 m = 1. 

495 ck = c.info['key_value_pairs'][self.comp_key] 

496 for other in candidates: 

497 if other != c: 

498 name = tuple(sorted([c.info['confid'], 

499 other.info['confid']])) 

500 if name not in self.sh_cache: 

501 ok = other.info['key_value_pairs'][self.comp_key] 

502 d = abs(ck - ok) 

503 if d < self.dt: 

504 v = 1 - (d / self.dt)**self.alpha_sh 

505 self.sh_cache[name] = v 

506 else: 

507 self.sh_cache[name] = 0 

508 m += self.sh_cache[name] 

509 

510 shf = (obj_fit ** self.fit_scaling) / m 

511 shared_fit.append(shf) 

512 return shared_fit 

513 

514 def update(self): 

515 """ The update method in Population will add to the end of 

516 the population, that can't be used here since the shared fitness 

517 will change for all candidates when new are added, therefore 

518 just recalc the population every time. """ 

519 

520 self.pop = [] 

521 self.__initialize_pop__() 

522 

523 self._write_log() 

524 

525 def __initialize_pop__(self): 

526 # Get all relaxed candidates from the database 

527 ue = self.use_extinct 

528 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

529 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) 

530 

531 if len(all_cand) > 0: 

532 shared_fit = self.__get_fitness__(all_cand) 

533 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand), 

534 reverse=True)))[1] 

535 

536 # Fill up the population with the self.pop_size most stable 

537 # unique candidates. 

538 i = 0 

539 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

540 c = all_sorted[i] 

541 i += 1 

542 eq = False 

543 for a in self.pop: 

544 if self.comparator.looks_like(a, c): 

545 eq = True 

546 break 

547 if not eq: 

548 self.pop.append(c) 

549 

550 for a in self.pop: 

551 a.info['looks_like'] = count_looks_like(a, all_cand, 

552 self.comparator) 

553 self.all_cand = all_cand 

554 

555 def get_two_candidates(self): 

556 """ Returns two candidates for pairing employing the 

557 fitness criteria from 

558 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

559 and the roulete wheel selection scheme described in 

560 R.L. Johnston Dalton Transactions, 

561 Vol. 22, No. 22. (2003), pp. 4193-4207 

562 """ 

563 

564 if len(self.pop) < 2: 

565 self.update() 

566 

567 if len(self.pop) < 2: 

568 return None 

569 

570 fit = self.__get_fitness__(self.pop) 

571 fmax = max(fit) 

572 c1 = self.pop[0] 

573 c2 = self.pop[0] 

574 while c1.info['confid'] == c2.info['confid']: 

575 nnf = True 

576 while nnf: 

577 t = self.rng.randint(len(self.pop)) 

578 if fit[t] > self.rng.random() * fmax: 

579 c1 = self.pop[t] 

580 nnf = False 

581 nnf = True 

582 while nnf: 

583 t = self.rng.randint(len(self.pop)) 

584 if fit[t] > self.rng.random() * fmax: 

585 c2 = self.pop[t] 

586 nnf = False 

587 

588 return (c1.copy(), c2.copy()) 

589 

590 

591class RankFitnessPopulation(Population): 

592 """ Ranks the fitness relative to set variable to flatten the surface 

593 in a certain direction such that mating across variable is equally 

594 likely irrespective of raw_score. 

595 

596 Parameters: 

597 

598 variable_function: function 

599 A function that takes as input an Atoms object and returns 

600 the variable that differentiates the ranks. 

601 

602 exp_function: boolean 

603 If True use an exponential function for ranking the fitness. 

604 If False use the same as in Population. Default True. 

605 

606 exp_prefactor: float 

607 The prefactor used in the exponential fitness scaling function. 

608 Default 0.5 

609 """ 

610 

611 def __init__(self, data_connection, population_size, variable_function, 

612 comparator=None, logfile=None, use_extinct=False, 

613 exp_function=True, exp_prefactor=0.5): 

614 self.exp_function = exp_function 

615 self.exp_prefactor = exp_prefactor 

616 self.vf = variable_function 

617 # The current fitness is set at each update of the population 

618 self.current_fitness = None 

619 

620 Population.__init__(self, data_connection, population_size, 

621 comparator, logfile, use_extinct) 

622 

623 def get_rank(self, rcand, key=None): 

624 # Set the initial order of the candidates, will need to 

625 # be returned in this order at the end of ranking. 

626 ordered = list(zip(range(len(rcand)), rcand)) 

627 

628 # Niche and rank candidates. 

629 rec_nic = [] 

630 rank_fit = [] 

631 for o, c in ordered: 

632 if o not in rec_nic: 

633 ntr = [] 

634 ce1 = self.vf(c) 

635 rec_nic.append(o) 

636 ntr.append([o, c]) 

637 for oother, cother in ordered: 

638 if oother not in rec_nic: 

639 ce2 = self.vf(cother) 

640 if ce1 == ce2: 

641 # put the now processed in oother 

642 # in rec_nic as well 

643 rec_nic.append(oother) 

644 ntr.append([oother, cother]) 

645 # Each niche is sorted according to raw_score and 

646 # assigned a fitness according to the ranking of 

647 # the candidates 

648 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key], 

649 reverse=True) 

650 start_rank = -1 

651 cor = 0 

652 for on, cn in ntr: 

653 rank = start_rank - cor 

654 rank_fit.append([on, cn, rank]) 

655 cor += 1 

656 # The original order is reformed 

657 rank_fit.sort(key=itemgetter(0), reverse=False) 

658 return np.array(list(zip(*rank_fit))[2]) 

659 

660 def __get_fitness__(self, candidates): 

661 expf = self.exp_function 

662 rfit = self.get_rank(candidates, key='raw_score') 

663 

664 if not expf: 

665 rmax = max(rfit) 

666 rmin = min(rfit) 

667 T = rmin - rmax 

668 # If using obj_rank probability, must have non-zero T val. 

669 # pop_size must be greater than number of permutations. 

670 # We test for this here 

671 msg = "Equal fitness for best and worst candidate in the " 

672 msg += "population! Fitness scaling is impossible! " 

673 msg += "Try with a larger population." 

674 assert T != 0., msg 

675 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.)) 

676 else: 

677 return self.exp_prefactor ** (-rfit - 1) 

678 

679 def update(self): 

680 """ The update method in Population will add to the end of 

681 the population, that can't be used here since the fitness 

682 will potentially change for all candidates when new are added, 

683 therefore just recalc the population every time. """ 

684 

685 self.pop = [] 

686 self.__initialize_pop__() 

687 self.current_fitness = self.__get_fitness__(self.pop) 

688 

689 self._write_log() 

690 

691 def __initialize_pop__(self): 

692 # Get all relaxed candidates from the database 

693 ue = self.use_extinct 

694 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

695 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) 

696 

697 if len(all_cand) > 0: 

698 fitf = self.__get_fitness__(all_cand) 

699 all_sorted = list(zip(fitf, all_cand)) 

700 all_sorted.sort(key=itemgetter(0), reverse=True) 

701 sort_cand = [] 

702 for _, t2 in all_sorted: 

703 sort_cand.append(t2) 

704 all_sorted = sort_cand 

705 

706 # Fill up the population with the self.pop_size most stable 

707 # unique candidates. 

708 i = 0 

709 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

710 c = all_sorted[i] 

711 c_vf = self.vf(c) 

712 i += 1 

713 eq = False 

714 for a in self.pop: 

715 a_vf = self.vf(a) 

716 # Only run comparator if the variable_function (self.vf) 

717 # returns the same. If it returns something different the 

718 # candidates are inherently different. 

719 # This is done to speed up. 

720 if a_vf == c_vf: 

721 if self.comparator.looks_like(a, c): 

722 eq = True 

723 break 

724 if not eq: 

725 self.pop.append(c) 

726 self.all_cand = all_cand 

727 

728 def get_two_candidates(self): 

729 """ Returns two candidates for pairing employing the 

730 roulete wheel selection scheme described in 

731 R.L. Johnston Dalton Transactions, 

732 Vol. 22, No. 22. (2003), pp. 4193-4207 

733 """ 

734 

735 if len(self.pop) < 2: 

736 self.update() 

737 

738 if len(self.pop) < 2: 

739 return None 

740 

741 # Use saved fitness 

742 fit = self.current_fitness 

743 fmax = max(fit) 

744 c1 = self.pop[0] 

745 c2 = self.pop[0] 

746 while c1.info['confid'] == c2.info['confid']: 

747 nnf = True 

748 while nnf: 

749 t = self.rng.randint(len(self.pop)) 

750 if fit[t] > self.rng.random() * fmax: 

751 c1 = self.pop[t] 

752 nnf = False 

753 nnf = True 

754 while nnf: 

755 t = self.rng.randint(len(self.pop)) 

756 if fit[t] > self.rng.random() * fmax: 

757 c2 = self.pop[t] 

758 nnf = False 

759 

760 return (c1.copy(), c2.copy()) 

761 

762 

763class MultiObjectivePopulation(RankFitnessPopulation): 

764 """ Allows for assignment of fitness based on a set of two variables 

765 such that fitness is ranked according to a Pareto-front of 

766 non-dominated candidates. 

767 

768 Parameters 

769 ---------- 

770 abs_data: list 

771 Set of key_value_pairs in atoms object for which fitness should 

772 be assigned based on absolute value. 

773 

774 rank_data: list 

775 Set of key_value_pairs in atoms object for which data should 

776 be ranked in order to ascribe fitness. 

777 

778 variable_function: function 

779 A function that takes as input an Atoms object and returns 

780 the variable that differentiates the ranks. Only use if 

781 data is ranked. 

782 

783 exp_function: boolean 

784 If True use an exponential function for ranking the fitness. 

785 If False use the same as in Population. Default True. 

786 

787 exp_prefactor: float 

788 The prefactor used in the exponential fitness scaling function. 

789 Default 0.5 

790 

791 """ 

792 

793 def __init__(self, data_connection, population_size, 

794 variable_function=None, comparator=None, logfile=None, 

795 use_extinct=False, abs_data=None, rank_data=None, 

796 exp_function=True, exp_prefactor=0.5): 

797 # The current fitness is set at each update of the population 

798 self.current_fitness = None 

799 

800 if rank_data is None: 

801 rank_data = [] 

802 self.rank_data = rank_data 

803 

804 if abs_data is None: 

805 abs_data = [] 

806 self.abs_data = abs_data 

807 

808 RankFitnessPopulation.__init__(self, data_connection, population_size, 

809 variable_function, comparator, logfile, 

810 use_extinct, exp_function, 

811 exp_prefactor) 

812 

813 def get_nonrank(self, nrcand, key=None): 

814 """"Returns a list of fitness values.""" 

815 nrc_list = [] 

816 for nrc in nrcand: 

817 nrc_list.append(nrc.info['key_value_pairs'][key]) 

818 return nrc_list 

819 

820 def __get_fitness__(self, candidates): 

821 # There are no defaults set for the datasets to be 

822 # used in this function, as such we test that the 

823 # user has specified at least two here. 

824 msg = "This is a multi-objective fitness function" 

825 msg += " so there must be at least two datasets" 

826 msg += " stated in the rank_data and abs_data variables" 

827 assert len(self.rank_data) + len(self.abs_data) >= 2, msg 

828 

829 expf = self.exp_function 

830 

831 all_fitnesses = [] 

832 used = set() 

833 for rd in self.rank_data: 

834 used.add(rd) 

835 # Build ranked fitness based on rd 

836 all_fitnesses.append(self.get_rank(candidates, key=rd)) 

837 

838 for d in self.abs_data: 

839 if d not in used: 

840 used.add(d) 

841 # Build fitness based on d 

842 all_fitnesses.append(self.get_nonrank(candidates, key=d)) 

843 

844 # Set the initial order of the ranks, will need to 

845 # be returned in this order at the end. 

846 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses)) 

847 mvf_rank = -1 # Start multi variable rank at -1. 

848 rec_vrc = [] # A record of already ranked candidates. 

849 mvf_list = [] # A list for all candidate ranks. 

850 # Sort by raw_score_1 in case this is different from 

851 # the stored raw_score() variable that all_cands are 

852 # sorted by. 

853 fordered.sort(key=itemgetter(1), reverse=True) 

854 # Niche candidates with equal or better raw_score to 

855 # current candidate. 

856 for a in fordered: 

857 order, rest = a[0], a[1:] 

858 if order not in rec_vrc: 

859 pff = [] 

860 pff2 = [] 

861 rec_vrc.append(order) 

862 pff.append((order, rest)) 

863 for b in fordered: 

864 border, brest = b[0], b[1:] 

865 if border not in rec_vrc: 

866 if np.any(np.array(brest) >= np.array(rest)): 

867 pff.append((border, brest)) 

868 # Remove any candidate from pff list that is dominated 

869 # by another in the list. 

870 for na in pff: 

871 norder, nrest = na[0], na[1:] 

872 dom = False 

873 for nb in pff: 

874 nborder, nbrest = nb[0], nb[1:] 

875 if norder != nborder: 

876 if np.all(np.array(nbrest) > np.array(nrest)): 

877 dom = True 

878 if not dom: 

879 pff2.append((norder, nrest)) 

880 # Assign pareto rank from -1 to -N niches. 

881 for ffa in pff2: 

882 fforder, ffrest = ffa[0], ffa[1:] 

883 rec_vrc.append(fforder) 

884 mvf_list.append((fforder, mvf_rank, ffrest)) 

885 mvf_rank = mvf_rank - 1 

886 # The original order is reformed 

887 mvf_list.sort(key=itemgetter(0), reverse=False) 

888 rfro = np.array(list(zip(*mvf_list))[1]) 

889 

890 if not expf: 

891 rmax = max(rfro) 

892 rmin = min(rfro) 

893 T = rmin - rmax 

894 # If using obj_rank probability, must have non-zero T val. 

895 # pop_size must be greater than number of permutations. 

896 # We test for this here 

897 msg = "Equal fitness for best and worst candidate in the " 

898 msg += "population! Fitness scaling is impossible! " 

899 msg += "Try with a larger population." 

900 assert T != 0., msg 

901 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.)) 

902 else: 

903 return self.exp_prefactor ** (-rfro - 1) 

904 

905 def __initialize_pop__(self): 

906 # Get all relaxed candidates from the database 

907 ue = self.use_extinct 

908 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

909 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) 

910 

911 if len(all_cand) > 0: 

912 fitf = self.__get_fitness__(all_cand) 

913 all_sorted = list(zip(fitf, all_cand)) 

914 all_sorted.sort(key=itemgetter(0), reverse=True) 

915 sort_cand = [] 

916 for _, t2 in all_sorted: 

917 sort_cand.append(t2) 

918 all_sorted = sort_cand 

919 

920 # Fill up the population with the self.pop_size most stable 

921 # unique candidates. 

922 i = 0 

923 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

924 c = all_sorted[i] 

925 # Use variable_function to decide whether to run comparator 

926 # if the function has been defined by the user. This does not 

927 # need to be dependent on using the rank_data function. 

928 if self.vf is not None: 

929 c_vf = self.vf(c) 

930 i += 1 

931 eq = False 

932 for a in self.pop: 

933 if self.vf is not None: 

934 a_vf = self.vf(a) 

935 # Only run comparator if the variable_function 

936 # (self.vf) returns the same. If it returns something 

937 # different the candidates are inherently different. 

938 # This is done to speed up. 

939 if a_vf == c_vf: 

940 if self.comparator.looks_like(a, c): 

941 eq = True 

942 break 

943 else: 

944 if self.comparator.looks_like(a, c): 

945 eq = True 

946 break 

947 if not eq: 

948 self.pop.append(c) 

949 self.all_cand = all_cand